From: kaf24@scramble.cl.cam.ac.uk Date: Mon, 26 Apr 2004 10:02:49 +0000 (+0000) Subject: bitkeeper revision 1.878 (408cde49uOK-SAzfb5CBmpUoC0yXOg) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~18243 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22Dat/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22Dat?a=commitdiff_plain;h=112600e25645a55218690c4c549e673546f7cb08;p=xen.git bitkeeper revision 1.878 (408cde49uOK-SAzfb5CBmpUoC0yXOg) Dir renames. --- diff --git a/.rootkeys b/.rootkeys index f81687053e..74cc58825a 100644 --- a/.rootkeys +++ b/.rootkeys @@ -656,6 +656,18 @@ 40648526SxcA4lGIHB_k7ID8VlRSzw xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev 3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/Makefile 3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/balloon.c +4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile +4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile +4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h +4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c +4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c +4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c +4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c +40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h +4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h +4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c 3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile 3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c 3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.h @@ -667,25 +679,13 @@ 3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/vfr.c 40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/Makefile 40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/evtchn.c +4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile +4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile +4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c +405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile +405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c 3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.26-sparse/arch/xen/drivers/network/Makefile 3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c -4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile -4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile -4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h -4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c -4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c -4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c -4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c -4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h -4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c -40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h -4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile -4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile -4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c -405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile -405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile 4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.26-sparse/arch/xen/kernel/entry.S diff --git a/xenolinux-2.4.26-sparse/arch/xen/Makefile b/xenolinux-2.4.26-sparse/arch/xen/Makefile index 3abb3c7421..d799c003bb 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/Makefile +++ b/xenolinux-2.4.26-sparse/arch/xen/Makefile @@ -52,8 +52,8 @@ SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib SUBDIRS += arch/xen/drivers/console SUBDIRS += arch/xen/drivers/evtchn ifdef CONFIG_XEN_NEWIO -SUBDIRS += arch/xen/drivers/vblkif -SUBDIRS += arch/xen/drivers/vnetif +SUBDIRS += arch/xen/drivers/blkif +SUBDIRS += arch/xen/drivers/netif else SUBDIRS += arch/xen/drivers/block SUBDIRS += arch/xen/drivers/network @@ -67,8 +67,8 @@ CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o CORE_FILES += arch/xen/drivers/evtchn/drv.o CORE_FILES += arch/xen/drivers/console/drv.o ifdef CONFIG_XEN_NEWIO -CORE_FILES += arch/xen/drivers/vblkif/drv.o -CORE_FILES += arch/xen/drivers/vnetif/drv.o +CORE_FILES += arch/xen/drivers/blkif/drv.o +CORE_FILES += arch/xen/drivers/netif/drv.o else CORE_FILES += arch/xen/drivers/block/drv.o CORE_FILES += arch/xen/drivers/network/drv.o diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile new file mode 100644 index 0000000000..20c8192d3d --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile @@ -0,0 +1,10 @@ + +O_TARGET := drv.o + +subdir-y += frontend +obj-y += frontend/drv.o + +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend +obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile new file mode 100644 index 0000000000..4c8c17367c --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := main.o control.o interface.o vbd.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h new file mode 100644 index 0000000000..4895172937 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h @@ -0,0 +1,98 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/common.h + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include "../blkif.h" + +#ifndef NDEBUG +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned long shmem_frame; + unsigned int evtchn; + int irq; + /* Comms information. */ + blk_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ + BLK_RING_IDX blk_req_cons; /* Request consumer. */ + BLK_RING_IDX blk_resp_prod; /* Private version of response producer. */ + /* VBDs attached to this interface. */ + rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */ + spinlock_t vbd_lock; /* Protects VBD mapping. */ + /* Private fields. */ + struct blkif_st *hash_next; + struct list_head blkdev_list; + spinlock_t blk_ring_lock; +} blkif_t; + +void blkif_create(blkif_create_t *create); +void blkif_destroy(blkif_destroy_t *destroy); +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); +void blkif_get(blkif_t *blkif); +void blkif_put(blkif_t *blkif); + +/* An entry in a list of xen_extents. */ +typedef struct _blkif_extent_le { + blkif_extent_t extent; /* an individual extent */ + struct _blkif_extent_le *next; /* and a pointer to the next */ +} blkif_extent_le_t; + +typedef struct _vbd { + blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ + unsigned char mode; /* VBD_MODE_{R,W} */ + unsigned char type; /* XD_TYPE_xxx */ + blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */ + rb_node_t rb; /* for linking into R-B tree lookup struct */ +} vbd_t; + +long vbd_create(blkif_vbd_create_t *create_params); +long vbd_grow(blkif_vbd_grow_t *grow_params); +long vbd_shrink(blkif_vbd_shrink_t *shrink_params); +long vbd_destroy(blkif_vbd_destroy_t *delete_params); + +void destroy_all_vbds(struct task_struct *p); + +typedef struct { + blkif_t *blkif; + unsigned long id; + atomic_t pendcnt; + unsigned short operation; + unsigned short status; +} pending_req_t; + +/* Describes a [partial] disk extent (part of a block io request) */ +typedef struct { + unsigned short dev; + unsigned short nr_sects; + unsigned long buffer; + xen_sector_t sector_number; +} phys_seg_t; + +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); + +int blkif_be_controller_init(void); + +void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c new file mode 100644 index 0000000000..c7ef10c3ba --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c @@ -0,0 +1,61 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/control.c + * + * Routines for interfacing with the control plane. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" + +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->subtype ) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_create_t) ) + goto parse_error; + blkif_create((blkif_create_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_destroy_t) ) + goto parse_error; + blkif_destroy((blkif_destroy_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_VBD_CREATE: + if ( msg->length != sizeof(blkif_vbd_create_t) ) + goto parse_error; + vbd_create((blkif_vbd_create_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_VBD_DESTROY: + if ( msg->length != sizeof(blkif_vbd_destroy_t) ) + goto parse_error; + vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_VBD_GROW: + if ( msg->length != sizeof(blkif_vbd_grow_t) ) + goto parse_error; + vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_VBD_SHRINK: + if ( msg->length != sizeof(blkif_vbd_shrink_t) ) + goto parse_error; + vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]); + break; + default: + goto parse_error; + } + + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} + +int blkif_ctrlif_init(void) +{ + (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx); + return 0; +} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c new file mode 100644 index 0000000000..579795deb9 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c @@ -0,0 +1,96 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" + +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) \ + (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1)) + +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + (blkif->domid != domid) && + (blkif->handle != handle) ) + blkif = blkif->hash_next; + return blkif; +} + +void blkif_create(blkif_create_t *create) +{ + domid_t domid = create->domid; + unsigned int handle = create->blkif_handle; + unsigned int evtchn = create->evtchn; + unsigned long shmem_frame = create->shmem_frame; + blkif_t **pblkif, *blkif; + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif == NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + goto found_match; + pblkif = &(*pblkif)->hash_next; + } + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->handle = handle; + blkif->evtchn = evtchn; + blkif->irq = bind_evtchn_to_irq(evtchn); + blkif->shmem_frame = shmem_frame; + blkif->shmem_vbase = ioremap(shmem_frame<vbd_lock); + spin_lock_init(&blkif->blk_ring_lock); + + request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif); + + blkif->hash_next = *pblkif; + *pblkif = blkif; + + create->status = BLKIF_STATUS_OKAY; + return; + + found_match: + create->status = BLKIF_STATUS_INTERFACE_EXISTS; + return; + + evtchn_in_use: + unbind_evtchn_from_irq(evtchn); /* drop refcnt */ + create->status = BLKIF_STATUS_ERROR; + return; +} + +void blkif_destroy(blkif_destroy_t *destroy) +{ + domid_t domid = destroy->domid; + unsigned int handle = destroy->blkif_handle; + blkif_t **pblkif, *blkif; + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif = *pblkif) == NULL ) + { + if ( (blkif->domid == domid) && (blkif->handle == handle) ) + goto found_match; + pblkif = &blkif->hash_next; + } + + destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + return; + + found_match: + free_irq(blkif->irq, NULL); + unbind_evtchn_from_irq(blkif->evtchn); + *pblkif = blkif->hash_next; + kmem_cache_free(blkif_cachep, blkif); + destroy->status = BLKIF_STATUS_OKAY; +} + diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c new file mode 100644 index 0000000000..1e6190c3e6 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c @@ -0,0 +1,508 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/blkif/frontend + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + */ + +#include "common.h" + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW ** + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +#define MAX_PENDING_REQS 64 +#define BATCH_PER_DOMAIN 16 + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + * + * We can't allocate pending_req's in order, since they may complete out of + * order. We therefore maintain an allocation ring. This ring also indicates + * when enough work has been passed down -- at that point the allocation ring + * will be empty. + */ +static pending_req_t pending_reqs[MAX_PENDING_REQS]; +static unsigned char pending_ring[MAX_PENDING_REQS]; +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; +/* NB. We use a different index type to differentiate from shared blk rings. */ +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +static kmem_cache_t *buffer_head_cachep; + +static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned; + +static int lock_buffer(blkif_t *blkif, + unsigned long buffer, + unsigned short size, + int writeable_buffer); +static void unlock_buffer(unsigned long buffer, + unsigned short size, + int writeable_buffer); + +static void io_schedule(unsigned long unused); +static int do_block_io_op(blkif_t *blkif, int max_to_do); +static void dispatch_rw_block_io(blkif_t *blkif, + blk_ring_req_entry_t *req); +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, unsigned long st); + + +/****************************************************************** + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE + */ + +static struct list_head io_schedule_list; +static spinlock_t io_schedule_list_lock; + +static int __on_blkdev_list(blkif_t *blkif) +{ + return blkif->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(blkif_t *blkif) +{ + unsigned long flags; + if ( !__on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&io_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = NULL; + blkif_put(blkif); + } + spin_unlock_irqrestore(&io_schedule_list_lock, flags); +} + +static void add_to_blkdev_list_tail(blkif_t *blkif) +{ + unsigned long flags; + if ( __on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&io_schedule_list_lock, flags); + if ( !__on_blkdev_list(blkif) ) + { + list_add_tail(&blkif->blkdev_list, &io_schedule_list); + blkif_get(blkif); + } + spin_unlock_irqrestore(&io_schedule_list_lock, flags); +} + + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); + +static void io_schedule(unsigned long unused) +{ + blkif_t *blkif; + struct list_head *ent; + + /* Queue up a batch of requests. */ + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && + !list_empty(&io_schedule_list) ) + { + ent = io_schedule_list.next; + blkif = list_entry(ent, blkif_t, blkdev_list); + blkif_get(blkif); + remove_from_blkdev_list(blkif); + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(blkif); + blkif_put(blkif); + } + + /* Push the batch through to disc. */ + run_task_queue(&tq_disk); +} + +static void maybe_trigger_io_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && + !list_empty(&io_schedule_list) ) + tasklet_schedule(&io_schedule_tasklet); +} + + + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + +static void end_block_io_op(struct buffer_head *bh, int uptodate) +{ + pending_req_t *pending_req = bh->b_private; + + /* An error fails the entire request. */ + if ( !uptodate ) + { + DPRINTK("Buffer not up-to-date at end of operation\n"); + pending_req->status = 2; + } + + unlock_buffer(virt_to_phys(bh->b_data), + bh->b_size, + (pending_req->operation==READ)); + + if ( atomic_dec_and_test(&pending_req->pendcnt) ) + { + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + spin_lock(&pend_prod_lock); + pending_ring[MASK_PEND_IDX(pending_prod)] = + pending_req - pending_reqs; + pending_prod++; + spin_unlock(&pend_prod_lock); + maybe_trigger_io_schedule(); + } +} + + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_t *blkif = dev_id; + add_to_blkdev_list_tail(blkif); + maybe_trigger_io_schedule(); +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int lock_buffer(blkif_t *blkif, + unsigned long buffer, + unsigned short size, + int writeable_buffer) +{ + unsigned long pfn; + + for ( pfn = buffer >> PAGE_SHIFT; + pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); + pfn++ ) + { + } + + return 1; + + fail: + while ( pfn-- > (buffer >> PAGE_SHIFT) ) + { + } + return 0; +} + +static void unlock_buffer(unsigned long buffer, + unsigned short size, + int writeable_buffer) +{ + unsigned long pfn; + + for ( pfn = buffer >> PAGE_SHIFT; + pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); + pfn++ ) + { + } +} + +static int do_block_io_op(blkif_t *blkif, int max_to_do) +{ + blk_ring_t *blk_ring = blkif->blk_ring_base; + blk_ring_req_entry_t *req; + BLK_RING_IDX i; + int more_to_do = 0; + + /* Take items off the comms ring, taking care not to overflow. */ + for ( i = blkif->blk_req_cons; + (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != + BLK_RING_SIZE); + i++ ) + { + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) + { + more_to_do = 1; + break; + } + + req = &blk_ring->ring[MASK_BLK_IDX(i)].req; + switch ( req->operation ) + { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + dispatch_rw_block_io(blkif, req); + break; + + default: + DPRINTK("error: unknown block io operation [%d]\n", + blk_ring->ring[i].req.operation); + make_response(blkif, blk_ring->ring[i].req.id, + blk_ring->ring[i].req.operation, 1); + break; + } + } + + blkif->blk_req_cons = i; + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blk_ring_req_entry_t *req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + struct buffer_head *bh; + int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; + unsigned short nr_sects; + unsigned long buffer; + int i, tot_sects; + pending_req_t *pending_req; + + /* We map virtual scatter/gather segments to physical segments. */ + int new_segs, nr_psegs = 0; + phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; + + /* Check that number of segments is sane. */ + if ( unlikely(req->nr_segments == 0) || + unlikely(req->nr_segments > MAX_BLK_SEGS) ) + { + DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); + goto bad_descriptor; + } + + /* + * Check each address/size pair is sane, and convert into a + * physical device and block offset. Note that if the offset and size + * crosses a virtual extent boundary, we may end up with more + * physical scatter/gather segments than virtual segments. + */ + for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects ) + { + buffer = req->buffer_and_sects[i] & ~0x1FF; + nr_sects = req->buffer_and_sects[i] & 0x1FF; + + if ( unlikely(nr_sects == 0) ) + { + DPRINTK("zero-sized data request\n"); + goto bad_descriptor; + } + + phys_seg[nr_psegs].dev = req->device; + phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; + phys_seg[nr_psegs].buffer = buffer; + phys_seg[nr_psegs].nr_sects = nr_sects; + + /* Translate the request into the relevant 'physical device' */ + new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation); + if ( new_segs < 0 ) + { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + req->sector_number + tot_sects, + req->sector_number + tot_sects + nr_sects, + req->device); + goto bad_descriptor; + } + + nr_psegs += new_segs; + ASSERT(nr_psegs <= MAX_BLK_SEGS*2); + } + + for ( i = 0; i < nr_psegs; i++ ) + { + if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, + phys_seg[i].nr_sects << 9, + operation==READ)) ) + { + DPRINTK("invalid buffer\n"); + while ( i-- > 0 ) + unlock_buffer(phys_seg[i].buffer, + phys_seg[i].nr_sects << 9, + operation==READ); + goto bad_descriptor; + } + } + + pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]]; + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = operation; + pending_req->status = 0; + atomic_set(&pending_req->pendcnt, nr_psegs); + + blkif_get(blkif); + + /* Now we pass each segment down to the real blkdev layer. */ + for ( i = 0; i < nr_psegs; i++ ) + { + bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); + if ( unlikely(bh == NULL) ) + panic("bh is null\n"); + memset(bh, 0, sizeof (struct buffer_head)); + + bh->b_size = phys_seg[i].nr_sects << 9; + bh->b_dev = phys_seg[i].dev; + bh->b_rsector = (unsigned long)phys_seg[i].sector_number; + + /* SMH: we store a 'pseudo-virtual' bogus address in b_data since + later code will undo this transformation (i.e. +-PAGE_OFFSET). */ + bh->b_data = phys_to_virt(phys_seg[i].buffer); + + /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */ + bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; + bh->b_end_io = end_block_io_op; + bh->b_private = pending_req; + + bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock); + if ( operation == WRITE ) + bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); + + atomic_set(&bh->b_count, 1); + + /* Dispatch a single request. We'll flush it to disc later. */ + submit_bh(operation, bh); + } + + return; + + bad_descriptor: + make_response(blkif, req->id, req->operation, 1); +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, unsigned long st) +{ + blk_ring_resp_entry_t *resp; + + /* Place on the response ring for the relevant domain. */ + spin_lock(&blkif->blk_ring_lock); + resp = &blkif->blk_ring_base-> + ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp; + resp->id = id; + resp->operation = op; + resp->status = st; + wmb(); + blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod; + spin_unlock(&blkif->blk_ring_lock); + + /* Kick the relevant domain. */ + notify_via_evtchn(blkif->evtchn); +} + +static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs) +{ +#if 0 + unsigned long flags; + struct task_struct *p; + blk_ring_t *blk_ring; + int i; + + printk("Dumping block queue stats: nr_pending = %d" + " (prod=0x%08x,cons=0x%08x)\n", + NR_PENDING_REQS, pending_prod, pending_cons); + + read_lock_irqsave(&tasklist_lock, flags); + for_each_domain ( p ) + { + printk("Domain: %llu\n", blkif->domain); + blk_ring = blkif->blk_ring_base; + printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/" + "0x%08x on_list=%d\n", + blk_ring->req_prod, blkif->blk_req_cons, + blk_ring->resp_prod, blkif->blk_resp_prod, + __on_blkdev_list(p)); + } + read_unlock_irqrestore(&tasklist_lock, flags); + + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + { + printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n", + i, pending_reqs[i].domain, pending_reqs[i].id, + atomic_read(&pending_reqs[i].pendcnt), + pending_reqs[i].operation, pending_reqs[i].status); + } +#endif +} + +void unlink_blkdev_info(blkif_t *blkif) +{ + unsigned long flags; + + spin_lock_irqsave(&io_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = (void *)0xdeadbeef; + blkif_put(blkif); + } + spin_unlock_irqrestore(&io_schedule_list_lock, flags); +} + +static int __init init_module(void) +{ + int i; + + pending_cons = 0; + pending_prod = MAX_PENDING_REQS; + memset(pending_reqs, 0, sizeof(pending_reqs)); + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + pending_ring[i] = i; + + for ( i = 0; i < NR_CPUS; i++ ) + completed_bhs[i] = NULL; + + spin_lock_init(&io_schedule_list_lock); + INIT_LIST_HEAD(&io_schedule_list); + + if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, + SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + buffer_head_cachep = kmem_cache_create( + "buffer_head_cache", sizeof(struct buffer_head), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + return 0; +} + +static void cleanup_module(void) +{ +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c new file mode 100644 index 0000000000..bd6c40125c --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c @@ -0,0 +1,578 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/vbd.c + * + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + */ + +#include "common.h" + +void vbd_create(blkif_vbd_create_t *create) +{ + vbd_t *vbd; + rb_node_t **rb_p, *rb_parent = NULL; + blkif_t *blkif; + blkif_vdev_t vdevice = create->vdevice; + + blkif = blkif_find_by_handle(create->domid, create->blkif_handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", + create->domid, create->blkif_handle); + create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + return; + } + + spin_lock(&blkif->vbd_lock); + + rb_p = &blkif->vbd_rb.rb_node; + while ( *rb_p != NULL ) + { + rb_parent = *rb_p; + vbd = rb_entry(rb_parent, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + { + rb_p = &rb_parent->rb_left; + } + else if ( vdevice > vbd->vdevice ) + { + rb_p = &rb_parent->rb_right; + } + else + { + DPRINTK("vbd_create attempted for already existing vbd\n"); + create->status = BLKIF_STATUS_VBD_EXISTS; + goto out; + } + } + + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) + { + DPRINTK("vbd_create: out of memory\n"); + create->status = BLKIF_STATUS_OUT_OF_MEMORY; + goto out; + } + + vbd->vdevice = vdevice; + vbd->mode = create->mode; + vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + vbd->extents = NULL; + + rb_link_node(&vbd->rb, rb_parent, rb_p); + rb_insert_color(&vbd->rb, &blkif->vbd_rb); + + create->status = BLKIF_STATUS_OKAY; + + out: + spin_unlock(&blkif->vbd_lock); + blkif_put(blkif); +} + + +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ +void vbd_grow(blkif_vbd_grow_t *grow) +{ + blkif_t *blkif; + xen_extent_le_t **px, *x; + vbd_t *vbd = NULL; + rb_node_t *rb; + blkif_vdev_t vdevice = grow->vdevice; + + blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", + grow->domid, grow->blkif_handle); + grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + return; + } + + spin_lock(&blkif->vbd_lock); + + rb = blkif->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } + + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) + { + DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); + grow->status = BLKIF_STATUS_VBD_NOT_FOUND; + goto out; + } + + if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) ) + { + DPRINTK("vbd_grow: out of memory\n"); + grow->status = BLKIF_STATUS_OUT_OF_MEMORY; + goto out; + } + + x->extent.device = grow->extent.device; + x->extent.sector_start = grow->extent.sector_start; + x->extent.sector_length = grow->extent.sector_length; + x->next = (xen_extent_le_t *)NULL; + + for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) + continue; + + *px = x; + + grow->status = BLKIF_STATUS_OKAY; + + out: + spin_unlock(&blkif->vbd_lock); + blkif_put(blkif); +} + + +void vbd_shrink(blkif_vbd_shrink_t *shrink) +{ + blkif_t *blkif; + xen_extent_le_t **px, *x; + vbd_t *vbd = NULL; + rb_node_t *rb; + blkif_vdev_t vdevice = shrink->vdevice; + + blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", + shrink->domid, shrink->blkif_handle); + shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + return; + } + + spin_lock(&blkif->vbd_lock); + + rb = blkif->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } + + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) + { + shrink->status = BLKIF_STATUS_VBD_NOT_FOUND; + goto out; + } + + if ( unlikely(vbd->extents == NULL) ) + { + shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND; + goto out; + } + + /* Find the last extent. We now know that there is at least one. */ + for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next ) + continue; + + x = *px; + *px = x->next; + kfree(x); + + shrink->status = BLKIF_STATUS_OKAY; + + out: + spin_unlock(&blkif->vbd_lock); + blkif_put(blkif); +} + + +void vbd_destroy(blkif_vbd_destroy_t *destroy) +{ + blkif_t *blkif; + vbd_t *vbd; + rb_node_t *rb; + xen_extent_le_t *x, *t; + blkif_vdev_t vdevice = destroy->vdevice; + + blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", + destroy->domid, destroy->blkif_handle); + destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; + return; + } + + spin_lock(&blkif->vbd_lock); + + rb = blkif->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + goto found; + } + + destroy->status = BLKIF_STATUS_VBD_NOT_FOUND; + goto out; + + found: + rb_erase(rb, &blkif->vbd_rb); + x = vbd->extents; + kfree(vbd); + + while ( x != NULL ) + { + t = x->next; + kfree(x); + x = t; + } + + out: + spin_unlock(&blkif->vbd_lock); + blkif_put(blkif); +} + + +void destroy_all_vbds(blkif_t *blkif) +{ + vbd_t *vbd; + rb_node_t *rb; + xen_extent_le_t *x, *t; + + spin_lock(&blkif->vbd_lock); + + while ( (rb = blkif->vbd_rb.rb_node) != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + + rb_erase(rb, &blkif->vbd_rb); + x = vbd->extents; + kfree(vbd); + + while ( x != NULL ) + { + t = x->next; + kfree(x); + x = t; + } + } + + spin_unlock(&blkif->vbd_lock); +} + + +static int vbd_probe_single(xen_disk_info_t *xdi, + vbd_t *vbd, + struct task_struct *p) +{ + xen_extent_le_t *x; + xen_disk_t cur_disk; + + if ( xdi->count == xdi->max ) + { + DPRINTK("vbd_probe_devices: out of space for probe.\n"); + return -ENOMEM; + } + + cur_disk.device = vbd->vdevice; + cur_disk.info = vbd->type; + if ( !VBD_CAN_WRITE(vbd) ) + cur_disk.info |= XD_FLAG_RO; + cur_disk.capacity = 0ULL; + for ( x = vbd->extents; x != NULL; x = x->next ) + cur_disk.capacity += x->extent.nr_sectors; + cur_disk.domain = p->domain; + + /* Now copy into relevant part of user-space buffer */ + if( copy_to_user(&xdi->disks[xdi->count], + &cur_disk, + sizeof(xen_disk_t)) ) + { + DPRINTK("vbd_probe_devices: copy_to_user failed\n"); + return -EFAULT; + } + + xdi->count++; + + return 0; +} + + +static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) +{ + int rc = 0; + rb_node_t *rb; + + spin_lock(&p->vbd_lock); + + if ( (rb = p->vbd_rb.rb_node) == NULL ) + goto out; + + new_subtree: + /* STEP 1. Find least node (it'll be left-most). */ + while ( rb->rb_left != NULL ) + rb = rb->rb_left; + + for ( ; ; ) + { + /* STEP 2. Dealt with left subtree. Now process current node. */ + if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 ) + goto out; + + /* STEP 3. Process right subtree, if any. */ + if ( rb->rb_right != NULL ) + { + rb = rb->rb_right; + goto new_subtree; + } + + /* STEP 4. Done both subtrees. Head back through ancesstors. */ + for ( ; ; ) + { + /* We're done when we get back to the root node. */ + if ( rb->rb_parent == NULL ) + goto out; + /* If we are left of parent, then parent is next to process. */ + if ( rb->rb_parent->rb_left == rb ) + break; + /* If we are right of parent, then we climb to grandparent. */ + rb = rb->rb_parent; + } + + rb = rb->rb_parent; + } + + out: + spin_unlock(&p->vbd_lock); + return rc; +} + + +/* + * Return information about the VBDs available for a given domain, or for all + * domains; in the general case the 'domain' argument will be 0 which means + * "information about the caller"; otherwise the 'domain' argument will + * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of + * these cases require the caller to be privileged. + */ +long vbd_probe(vbd_probe_t *probe) +{ + struct task_struct *p = NULL; + unsigned long flags; + long ret = 0; + + if ( probe->domain != 0 ) + { + /* We can only probe for ourselves (unless we're privileged). */ + if( (probe->domain != current->domain) && !IS_PRIV(current) ) + return -EPERM; + + if ( (probe->domain != VBD_PROBE_ALL) && + ((p = find_domain_by_id(probe->domain)) == NULL) ) + { + DPRINTK("vbd_probe attempted for non-existent domain %llu\n", + probe->domain); + return -EINVAL; + } + } + else + { + /* Default is to probe for ourselves. */ + p = current; + get_task_struct(p); /* to mirror final put_task_struct */ + } + + if ( probe->domain == VBD_PROBE_ALL ) + { + read_lock_irqsave(&tasklist_lock, flags); + for_each_domain ( p ) + { + if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) + { + read_unlock_irqrestore(&tasklist_lock, flags); + goto out; + } + } + read_unlock_irqrestore(&tasklist_lock, flags); + } + else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) + goto out; + + out: + if ( ret != 0 ) + DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); + if ( p != NULL ) + put_task_struct(p); + return ret; +} + + +long vbd_info(vbd_info_t *info) +{ + struct task_struct *p; + xen_extent_le_t *x; + xen_extent_t *extents; + vbd_t *vbd = NULL; + rb_node_t *rb; + long ret = 0; + + if ( (info->domain != current->domain) && !IS_PRIV(current) ) + return -EPERM; + + if ( (p = find_domain_by_id(info->domain)) == NULL ) + { + DPRINTK("vbd_info attempted for non-existent domain %llu\n", + info->domain); + return -EINVAL; + } + + spin_lock(&p->vbd_lock); + + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( info->vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( info->vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } + + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) ) + { + DPRINTK("vbd_info attempted on non-existent VBD.\n"); + ret = -EINVAL; + goto out; + } + + info->mode = vbd->mode; + info->nextents = 0; + + extents = info->extents; + for ( x = vbd->extents; x != NULL; x = x->next ) + { + if ( info->nextents == info->maxextents ) + break; + if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) ) + { + DPRINTK("vbd_info: copy_to_user failed\n"); + ret = -EFAULT; + goto out; + } + extents++; + info->nextents++; + } + + out: + spin_unlock(&p->vbd_lock); + put_task_struct(p); + return ret; +} + + +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) +{ + xen_extent_le_t *x; + vbd_t *vbd; + rb_node_t *rb; + xen_sector_t sec_off; + unsigned long nr_secs; + + spin_lock(&p->vbd_lock); + + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( pseg->dev < vbd->vdevice ) + rb = rb->rb_left; + else if ( pseg->dev > vbd->vdevice ) + rb = rb->rb_right; + else + goto found; + } + + DPRINTK("vbd_translate; domain %llu attempted to access " + "non-existent VBD.\n", p->domain); + + spin_unlock(&p->vbd_lock); + return -ENODEV; + + found: + + if ( ((operation == READ) && !VBD_CAN_READ(vbd)) || + ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) ) + { + spin_unlock(&p->vbd_lock); + return -EACCES; + } + + /* + * Now iterate through the list of xen_extents, working out which should + * be used to perform the translation. + */ + sec_off = pseg->sector_number; + nr_secs = pseg->nr_sects; + for ( x = vbd->extents; x != NULL; x = x->next ) + { + if ( sec_off < x->extent.nr_sectors ) + { + pseg->dev = x->extent.device; + pseg->sector_number = x->extent.start_sector + sec_off; + if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) ) + goto overrun; + spin_unlock(&p->vbd_lock); + return 1; + } + sec_off -= x->extent.nr_sectors; + } + + DPRINTK("vbd_translate: end of vbd.\n"); + spin_unlock(&p->vbd_lock); + return -EACCES; + + /* + * Here we deal with overrun onto the following extent. We don't deal with + * overrun of more than one boundary since each request is restricted to + * 2^9 512-byte sectors, so it should be trivial for control software to + * ensure that extents are large enough to prevent excessive overrun. + */ + overrun: + + /* Adjust length of first chunk to run to end of first extent. */ + pseg[0].nr_sects = x->extent.nr_sectors - sec_off; + + /* Set second chunk buffer and length to start where first chunk ended. */ + pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); + pseg[1].nr_sects = nr_secs - pseg[0].nr_sects; + + /* Now move to the next extent. Check it exists and is long enough! */ + if ( unlikely((x = x->next) == NULL) || + unlikely(x->extent.nr_sectors < pseg[1].nr_sects) ) + { + DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); + spin_unlock(&p->vbd_lock); + return -EACCES; + } + + /* Store the real device and start sector for the second chunk. */ + pseg[1].dev = x->extent.device; + pseg[1].sector_number = x->extent.start_sector; + + spin_unlock(&p->vbd_lock); + return 2; +} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h new file mode 100644 index 0000000000..f6e8a4d5c8 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __SHARED_BLKIF_H__ +#define __SHARED_BLKIF_H__ + +#define blkif_vdev_t u16 +#define blkif_sector_t u64 + +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +#define BLKIF_OP_PROBE 2 + +/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */ +#define BLKIF_RING_SIZE 64 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_REQUEST_MAX_SEGMENTS 11 + +typedef struct { + unsigned char operation; /* BLKIF_OP_??? */ + unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */ + blkif_vdev_t device; /* only for read/write requests */ + unsigned long id; /* private guest value, echoed in resp */ + xen_sector_t sector_number; /* start sector idx on disk (r/w only) */ + /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ + unsigned long buffer_and_sects[MAX_BLK_SEGS]; +} blkif_request_t; + +typedef struct { + unsigned long id; /* copied from request */ + unsigned char operation; /* copied from request */ + int status; /* BLKIF_RSP_??? */ +} blkif_response_t; + +#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */ +#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */ + +/* + * We use a special capitalised type name because it is _essential_ that all + * arithmetic on indexes is done on an integer type of the correct size. + */ +typedef unsigned int BLKIF_RING_IDX; + +/* + * Ring indexes are 'free running'. That is, they are not stored modulo the + * size of the ring buffer. The following macro converts a free-running counter + * into a value that can directly index a ring-buffer array. + */ +#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1)) + +typedef struct { + BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */ + BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */ + union { + blkif_request_t req; + blkif_response_t resp; + } ring[BLKIF_RING_SIZE]; +} blkif_ring_t; + + +/* + * BLKIF_OP_PROBE: + * The request format for a probe request is constrained as follows: + * @operation == BLKIF_OP_PROBE + * @nr_segments == size of probe buffer in pages + * @device == unused (zero) + * @id == any value (echoed in response message) + * @sector_num == unused (zero) + * @buffer_and_sects == list of page-aligned, page-sized buffers. + * (i.e., nr_sects == 8). + * + * The response is a list of vdisk_t elements copied into the out-of-band + * probe buffer. On success the response status field contains the number + * of vdisk_t elements. + */ + +/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */ +#define VDISK_TYPE_FLOPPY 0x00 +#define VDISK_TYPE_TAPE 0x01 +#define VDISK_TYPE_CDROM 0x05 +#define VDISK_TYPE_OPTICAL 0x07 +#define VDISK_TYPE_DISK 0x20 + +#define VDISK_TYPE_MASK 0x3F +#define VDISK_TYPE(_x) ((_x) & VDISK_TYPE_MASK) + +/* The top two bits of the type field encode various flags. */ +#define VDISK_FLAG_RO 0x40 +#define VDISK_FLAG_VIRT 0x80 +#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO) +#define VDISK_VIRTUAL(_x) ((_x) & VDISK_FLAG_VIRT) + +typedef struct { + blkif_vdev_t device; /* Device number (opaque 16 bit value). */ + unsigned short info; /* Device type and flags (VDISK_*). */ + xen_sector_t capacity; /* Size in terms of 512-byte sectors. */ +} vdisk_t; + +#endif /* __SHARED_BLKIF_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile new file mode 100644 index 0000000000..35986ca54a --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := block.o vbd.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c new file mode 100644 index 0000000000..d00dd98f7b --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c @@ -0,0 +1,625 @@ +/****************************************************************************** + * block.c + * + * Xenolinux virtual block-device driver. + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include +#include +#include +#include +#include + +#include + +typedef unsigned char byte; /* from linux/ide.h */ + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 +static unsigned int state = STATE_SUSPENDED; + +/* Dynamically-mapped IRQs. */ +static int xlblk_response_irq, xlblk_update_irq; + +static blk_ring_t *blk_ring; +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ +static BLK_RING_IDX req_prod; /* Private request producer. */ + +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ + (state != STATE_ACTIVE)) + + +/* + * Request queues with outstanding work, but ring is currently full. + * We need no special lock here, as we always access this with the + * io_request_lock held. We only need a small maximum list. + */ +#define MAX_PENDING 8 +static request_queue_t *pending_queues[MAX_PENDING]; +static int nr_pending; + +static kdev_t sg_dev; +static int sg_operation = -1; +static unsigned long sg_next_sect; +#define DISABLE_SCATTERGATHER() (sg_operation = -1) + +static inline void signal_requests_to_xen(void) +{ + block_io_op_t op; + + DISABLE_SCATTERGATHER(); + blk_ring->req_prod = req_prod; + + op.cmd = BLOCK_IO_OP_SIGNAL; + HYPERVISOR_block_io_op(&op); + return; +} + + +/* + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen + * + * Schedule a task for keventd to run, which will update the VBDs and perform + * the corresponding updates to our view of VBD state, so the XenoLinux will + * respond to changes / additions / deletions to the set of VBDs automatically. + */ +static struct tq_struct update_tq; +static void update_vbds_task(void *unused) +{ + xlvbd_update_vbds(); +} +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); +} + + +int xen_block_open(struct inode *inode, struct file *filep) +{ + short xldev = inode->i_rdev; + struct gendisk *gd = get_gendisk(xldev); + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + short minor = MINOR(xldev); + + if ( gd->part[minor].nr_sects == 0 ) + { + /* + * Device either doesn't exist, or has zero capacity; we use a few + * cheesy heuristics to return the relevant error code + */ + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || + ((minor & (gd->max_p - 1)) != 0) ) + { + /* + * We have a real device, but no such partition, or we just have a + * partition number so guess this is the problem. + */ + return -ENXIO; /* no such device or address */ + } + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) + { + /* This is a removable device => assume that media is missing. */ + return -ENOMEDIUM; /* media not present (this is a guess) */ + } + else + { + /* Just go for the general 'no such device' error. */ + return -ENODEV; /* no such device */ + } + } + + /* Update of usage count is protected by per-device semaphore. */ + disk->usage++; + + return 0; +} + + +int xen_block_release(struct inode *inode, struct file *filep) +{ + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + + /* + * When usage drops to zero it may allow more VBD updates to occur. + * Update of usage count is protected by a per-device semaphore. + */ + if ( --disk->usage == 0 ) + { + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); + } + + return 0; +} + + +int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument) +{ + kdev_t dev = inode->i_rdev; + struct hd_geometry *geo = (struct hd_geometry *)argument; + struct gendisk *gd; + struct hd_struct *part; + int i; + + /* NB. No need to check permissions. That is done for us. */ + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long) argument, dev); + + gd = get_gendisk(dev); + part = &gd->part[MINOR(dev)]; + + switch ( command ) + { + case BLKGETSIZE: + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + return put_user(part->nr_sects, (unsigned long *) argument); + + case BLKGETSIZE64: + DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, + (u64)part->nr_sects * 512); + return put_user((u64)part->nr_sects * 512, (u64 *) argument); + + case BLKRRPART: /* re-read partition table */ + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + return xen_block_revalidate(dev); + + case BLKSSZGET: + return hardsect_size[MAJOR(dev)][MINOR(dev)]; + + case BLKBSZGET: /* get block size */ + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); + break; + + case BLKBSZSET: /* set block size */ + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); + break; + + case BLKRASET: /* set read-ahead */ + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); + break; + + case BLKRAGET: /* get read-ahead */ + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); + break; + + case HDIO_GETGEO: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; + return 0; + + case HDIO_GETGEO_BIG: + /* note: these values are complete garbage */ + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); + if (!argument) return -EINVAL; + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; + return 0; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) + if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_BUS_NUMBER: + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); + return -ENOSYS; + + default: + printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); + return -ENOSYS; + } + + return 0; +} + +/* check media change: should probably do something here in some cases :-) */ +int xen_block_check(kdev_t dev) +{ + DPRINTK("xen_block_check\n"); + return 0; +} + +int xen_block_revalidate(kdev_t dev) +{ + struct block_device *bd; + struct gendisk *gd; + xl_disk_t *disk; + unsigned long capacity; + int i, rc = 0; + + if ( (bd = bdget(dev)) == NULL ) + return -EINVAL; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(dev)) == NULL) || + ((disk = xldev_to_xldisk(dev)) == NULL) || + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) + { + rc = -EINVAL; + goto out; + } + + if ( disk->usage > 1 ) + { + rc = -EBUSY; + goto out; + } + + /* Only reread partition table if VBDs aren't mapped to partitions. */ + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) + { + for ( i = gd->max_p - 1; i >= 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * hypervisor_request + * + * request block io + * + * id: for guest use only. + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int hypervisor_request(unsigned long id, + int operation, + char * buffer, + unsigned long sector_number, + unsigned short nr_sectors, + kdev_t device) +{ + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); + struct gendisk *gd; + blk_ring_req_entry_t *req; + struct buffer_head *bh; + + if ( unlikely(nr_sectors >= (1<<9)) ) + BUG(); + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) + BUG(); + + if ( unlikely(state == STATE_CLOSED) ) + return 1; + + switch ( operation ) + { + + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + gd = get_gendisk(device); + + /* + * Update the sector_number we'll pass down as appropriate; note that + * we could sanity check that resulting sector will be in this + * partition, but this will happen in xen anyhow. + */ + sector_number += gd->part[MINOR(device)].start_sect; + + /* + * If this unit doesn't consist of virtual (i.e., Xen-specified) + * partitions then we clear the partn bits from the device number. + */ + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + GENHD_FL_VIRT_PARTNS) ) + device &= ~(gd->max_p - 1); + + if ( (sg_operation == operation) && + (sg_dev == device) && + (sg_next_sect == sector_number) ) + { + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; + bh = (struct buffer_head *)id; + bh->b_reqnext = (struct buffer_head *)req->id; + req->id = id; + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; + if ( ++req->nr_segments < MAX_BLK_SEGS ) + sg_next_sect += nr_sectors; + else + DISABLE_SCATTERGATHER(); + return 0; + } + else if ( RING_PLUGGED ) + { + return 1; + } + else + { + sg_operation = operation; + sg_dev = device; + sg_next_sect = sector_number + nr_sectors; + } + break; + + default: + panic("unknown op %d\n", operation); + } + + /* Fill out a communications ring structure. */ + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; + req->id = id; + req->operation = operation; + req->sector_number = (xen_sector_t)sector_number; + req->device = device; + req->nr_segments = 1; + req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req_prod++; + + return 0; +} + + +/* + * do_xlblk_request + * read a block; request is in a request queue + */ +void do_xlblk_request(request_queue_t *rq) +{ + struct request *req; + struct buffer_head *bh, *next_bh; + int rw, nsect, full, queued = 0; + + DPRINTK("xlblk.c::do_xlblk_request\n"); + + while ( !rq->plugged && !list_empty(&rq->queue_head)) + { + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + goto out; + + DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", + req, req->cmd, req->sector, + req->current_nr_sectors, req->nr_sectors, req->bh); + + rw = req->cmd; + if ( rw == READA ) + rw = READ; + if ( unlikely((rw != READ) && (rw != WRITE)) ) + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); + + req->errors = 0; + + bh = req->bh; + while ( bh != NULL ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + + full = hypervisor_request( + (unsigned long)bh, + (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); + + if ( full ) + { + bh->b_reqnext = next_bh; + pending_queues[nr_pending++] = rq; + if ( unlikely(nr_pending >= MAX_PENDING) ) + BUG(); + goto out; + } + + queued++; + + /* Dequeue the buffer head from the request. */ + nsect = bh->b_size >> 9; + bh = req->bh = next_bh; + + if ( bh != NULL ) + { + /* There's another buffer head to do. Update the request. */ + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + req->current_nr_sectors = bh->b_size >> 9; + req->buffer = bh->b_data; + } + else + { + /* That was the last buffer head. Finalise the request. */ + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) + BUG(); + blkdev_dequeue_request(req); + end_that_request_last(req); + } + } + } + + out: + if ( queued != 0 ) signal_requests_to_xen(); +} + + +static void kick_pending_request_queues(void) +{ + /* We kick pending request queues if the ring is reasonably empty. */ + if ( (nr_pending != 0) && + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) + { + /* Attempt to drain the queue, but bail if the ring becomes full. */ + while ( (nr_pending != 0) && !RING_PLUGGED ) + do_xlblk_request(pending_queues[--nr_pending]); + } +} + + +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + BLK_RING_IDX i; + unsigned long flags; + struct buffer_head *bh, *next_bh; + + if ( unlikely(state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&io_request_lock, flags); + + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) + { + blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; + switch ( bret->operation ) + { + case XEN_BLOCK_READ: + case XEN_BLOCK_WRITE: + if ( unlikely(bret->status != 0) ) + DPRINTK("Bad return from blkdev data request: %lx\n", + bret->status); + for ( bh = (struct buffer_head *)bret->id; + bh != NULL; + bh = next_bh ) + { + next_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, !bret->status); + } + break; + + default: + BUG(); + } + } + + resp_cons = i; + + kick_pending_request_queues(); + + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +static void reset_xlblk_interface(void) +{ + block_io_op_t op; + + nr_pending = 0; + + op.cmd = BLOCK_IO_OP_RESET; + if ( HYPERVISOR_block_io_op(&op) != 0 ) + printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); + + op.cmd = BLOCK_IO_OP_RING_ADDRESS; + (void)HYPERVISOR_block_io_op(&op); + + set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); + blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + + wmb(); + state = STATE_ACTIVE; +} + + +int __init xlblk_init(void) +{ + int error; + + reset_xlblk_interface(); + + xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); + xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); + + error = request_irq(xlblk_response_irq, xlblk_response_int, + SA_SAMPLE_RANDOM, "blkdev", NULL); + if ( error ) + { + printk(KERN_ALERT "Could not allocate receive interrupt\n"); + goto fail; + } + + error = request_irq(xlblk_update_irq, xlblk_update_int, + 0, "blkdev", NULL); + + if ( error ) + { + printk(KERN_ALERT "Could not allocate block update interrupt\n"); + goto fail; + } + + (void)xlvbd_init(); + + return 0; + + fail: + return error; +} + + +static void __exit xlblk_cleanup(void) +{ + xlvbd_cleanup(); + free_irq(xlblk_response_irq, NULL); + free_irq(xlblk_update_irq, NULL); + unbind_virq_from_irq(VIRQ_BLKDEV); + unbind_virq_from_irq(VIRQ_VBD_UPD); +} + + +#ifdef MODULE +module_init(xlblk_init); +module_exit(xlblk_cleanup); +#endif + + +void blkdev_suspend(void) +{ + state = STATE_SUSPENDED; + wmb(); + + while ( resp_cons != blk_ring->req_prod ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + state = STATE_CLOSED; + wmb(); + + clear_fixmap(FIX_BLKRING_BASE); +} + + +void blkdev_resume(void) +{ + reset_xlblk_interface(); + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); +} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h new file mode 100644 index 0000000000..e41e03970e --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * block.h + * + * Shared definitions between all levels of XenoLinux Virtual block devices. + */ + +#ifndef __XEN_DRIVERS_BLOCK_H__ +#define __XEN_DRIVERS_BLOCK_H__ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if 0 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 0 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#else +#define DPRINTK_IOCTL(_f, _a...) ((void)0) +#endif + +/* Private gendisk->flags[] values. */ +#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. + * They hang in an array off the gendisk structure. We may end up putting + * all kinds of interesting stuff here :-) + */ +typedef struct xl_disk { + int usage; +} xl_disk_t; + +extern int xen_control_msg(int operration, char *buffer, int size); +extern int xen_block_open(struct inode *inode, struct file *filep); +extern int xen_block_release(struct inode *inode, struct file *filep); +extern int xen_block_ioctl(struct inode *inode, struct file *filep, + unsigned command, unsigned long argument); +extern int xen_block_check(kdev_t dev); +extern int xen_block_revalidate(kdev_t dev); +extern void do_xlblk_request (request_queue_t *rq); + +extern void xlvbd_update_vbds(void); + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = get_gendisk(xldev); + + if ( gd == NULL ) + return NULL; + + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> gd->minor_shift); +} + + +/* Virtual block-device subsystem. */ +extern int xlvbd_init(void); +extern void xlvbd_cleanup(void); + +#endif /* __XEN_DRIVERS_BLOCK_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c new file mode 100644 index 0000000000..e08b976c56 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c @@ -0,0 +1,561 @@ +/****************************************************************************** + * vbd.c + * + * Xenolinux virtual block-device driver (xvd). + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + */ + +#include "block.h" +#include + +/* + * For convenience we distinguish between ide, scsi and 'other' (i.e. + * potentially combinations of the two) in the naming scheme and in a few + * other places (like default readahead, etc). + */ +#define XLIDE_MAJOR_NAME "hd" +#define XLSCSI_MAJOR_NAME "sd" +#define XLVBD_MAJOR_NAME "xvd" + +#define XLIDE_DEVS_PER_MAJOR 2 +#define XLSCSI_DEVS_PER_MAJOR 16 +#define XLVBD_DEVS_PER_MAJOR 16 + +#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ + +#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ + +#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ +#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ + +/* The below are for the generic drivers/block/ll_rw_block.c code. */ +static int xlide_blksize_size[256]; +static int xlide_hardsect_size[256]; +static int xlide_max_sectors[256]; +static int xlscsi_blksize_size[256]; +static int xlscsi_hardsect_size[256]; +static int xlscsi_max_sectors[256]; +static int xlvbd_blksize_size[256]; +static int xlvbd_hardsect_size[256]; +static int xlvbd_max_sectors[256]; + +/* Information from Xen about our VBDs. */ +#define MAX_VBDS 64 +static int nr_vbds; +static xen_disk_t *vbd_info; + +static struct block_device_operations xlvbd_block_fops = +{ + open: xen_block_open, + release: xen_block_release, + ioctl: xen_block_ioctl, + check_media_change: xen_block_check, + revalidate: xen_block_revalidate, +}; + +static int xlvbd_get_vbd_info(xen_disk_t *disk_info) +{ + int error; + block_io_op_t op; + + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + op.u.probe_params.xdi.max = MAX_VBDS; + op.u.probe_params.xdi.disks = disk_info; + op.u.probe_params.xdi.count = 0; + + if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + return -1; + } + + return op.u.probe_params.xdi.count; +} + +/* + * xlvbd_init_device - initialise a VBD device + * @disk: a xen_disk_t describing the VBD + * + * Takes a xen_disk_t * that describes a VBD the domain has access to. + * Performs appropriate initialisation and registration of the device. + * + * Care needs to be taken when making re-entrant calls to ensure that + * corruption does not occur. Also, devices that are in use should not have + * their details updated. This is the caller's responsibility. + */ +static int xlvbd_init_device(xen_disk_t *xd) +{ + int device = xd->device; + int major = MAJOR(device); + int minor = MINOR(device); + int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + char *major_name; + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk; + int i, rc = 0, max_part, partno; + unsigned long capacity; + + unsigned char buf[64]; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) + { + printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( is_ide ) { + + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + + } else if ( is_scsi ) { + + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + + } else if (XD_VIRTUAL(xd->info)) { + + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + + } else { + + /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ + printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", + major, minor); + is_scsi = 1; + major_name = "cciss"; + max_part = XLSCSI_MAX_PART; + + } + + partno = minor & (max_part - 1); + + if ( (gd = get_gendisk(device)) == NULL ) + { + rc = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( rc < 0 ) + { + printk(KERN_ALERT "XL VBD: can't get major %d\n", major); + goto out; + } + + if ( is_ide ) + { + blksize_size[major] = xlide_blksize_size; + hardsect_size[major] = xlide_hardsect_size; + max_sectors[major] = xlide_max_sectors; + read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ + } + else if ( is_scsi ) + { + blksize_size[major] = xlscsi_blksize_size; + hardsect_size[major] = xlscsi_hardsect_size; + max_sectors[major] = xlscsi_max_sectors; + read_ahead[major] = 0; /* XXX 8; -- guessing */ + } + else + { + blksize_size[major] = xlvbd_blksize_size; + hardsect_size[major] = xlvbd_hardsect_size; + max_sectors[major] = xlvbd_max_sectors; + read_ahead[major] = 8; + } + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->major = major; + gd->major_name = major_name; + + gd->max_p = max_part; + if ( is_ide ) + { + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->nr_real = XLIDE_DEVS_PER_MAJOR; + } + else if ( is_scsi ) + { + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->nr_real = XLSCSI_DEVS_PER_MAJOR; + } + else + { + gd->minor_shift = XLVBD_PARTN_SHIFT; + gd->nr_real = XLVBD_DEVS_PER_MAJOR; + } + + /* + ** The sizes[] and part[] arrays hold the sizes and other + ** information about every partition with this 'major' (i.e. + ** every disk sharing the 8 bit prefix * max partns per disk) + */ + gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); + gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), + GFP_KERNEL); + memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); + memset(gd->part, 0, max_part * gd->nr_real + * sizeof(struct hd_struct)); + + + gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), + GFP_KERNEL); + memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); + + gd->next = NULL; + gd->fops = &xlvbd_block_fops; + + gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), + GFP_KERNEL); + gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); + + memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); + memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + + add_gendisk(gd); + + blk_size[major] = gd->sizes; + } + + if ( XD_READONLY(xd->info) ) + set_device_ro(device, 1); + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; + + /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ + capacity = (unsigned long)xd->capacity; + + if ( partno != 0 ) + { + /* + * If this was previously set up as a real disc we will have set + * up partition-table information. Virtual partitions override + * 'real' partitions, and the two cannot coexist on a device. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(max_part-1)] != 0) ) + { + /* + * Any non-zero sub-partition entries must be cleaned out before + * installing 'virtual' partition entries. The two types cannot + * coexist, and virtual partitions are favoured. + */ + kdev_t dev = device & ~(max_part-1); + for ( i = max_part - 1; i > 0; i-- ) + { + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; + } + printk(KERN_ALERT + "Virtual partitions found for /dev/%s - ignoring any " + "real partition information we may have found.\n", + disk_name(gd, MINOR(device), buf)); + } + + /* Need to skankily setup 'partition' information */ + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity; + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + } + else + { + gd->part[minor].nr_sects = capacity; + gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); + + /* Some final fix-ups depending on the device type */ + switch ( XD_TYPE(xd->info) ) + { + case XD_TYPE_CDROM: + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; + printk(KERN_ALERT + "Skipping partition check on %s /dev/%s\n", + XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : + "floppy"), disk_name(gd, MINOR(device), buf)); + break; + + case XD_TYPE_DISK: + /* Only check partitions on real discs (not virtual!). */ + if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + { + printk(KERN_ALERT + "Skipping partition check on virtual /dev/%s\n", + disk_name(gd, MINOR(device), buf)); + break; + } + register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown device type %d\n", + XD_TYPE(xd->info)); + break; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + + +/* + * xlvbd_remove_device - remove a device node if possible + * @device: numeric device ID + * + * Updates the gendisk structure and invalidates devices. + * + * This is OK for now but in future, should perhaps consider where this should + * deallocate gendisks / unregister devices. + */ +static int xlvbd_remove_device(int device) +{ + int i, rc = 0, minor = MINOR(device); + struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk = NULL; + + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(device)) == NULL) || + ((disk = xldev_to_xldisk(device)) == NULL) ) + BUG(); + + if ( disk->usage != 0 ) + { + printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + + if ( (minor & (gd->max_p-1)) != 0 ) + { + /* 1: The VBD is mapped to a partition rather than a whole unit. */ + invalidate_device(device, 1); + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = 0; + gd->sizes[minor] = 0; + + /* Clear the consists-of-virtual-partitions flag if possible. */ + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + for ( i = 1; i < gd->max_p; i++ ) + if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + + /* + * If all virtual partitions are now gone, and a 'whole unit' VBD is + * present, then we can try to grok the unit's real partition table. + */ + if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && + (gd->sizes[minor & ~(gd->max_p-1)] != 0) && + !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) + { + register_disk(gd, + device&~(gd->max_p-1), + gd->max_p, + &xlvbd_block_fops, + gd->part[minor&~(gd->max_p-1)].nr_sects); + } + } + else + { + /* + * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. + * NB. The partition entries are only cleared if there are no VBDs + * mapped to individual partitions on this unit. + */ + i = gd->max_p - 1; /* Default: clear subpartitions as well. */ + if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ + while ( i >= 0 ) + { + invalidate_device(device+i, 1); + gd->part[minor+i].start_sect = 0; + gd->part[minor+i].nr_sects = 0; + gd->sizes[minor+i] = 0; + i--; + } + } + + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + +/* + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver + * state. The VBDs need to be updated in this way when the domain is + * initialised and also each time we receive an XLBLK_UPDATE event. + */ +void xlvbd_update_vbds(void) +{ + int i, j, k, old_nr, new_nr; + xen_disk_t *old_info, *new_info, *merged_info; + + old_info = vbd_info; + old_nr = nr_vbds; + + new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) + { + kfree(new_info); + return; + } + + /* + * Final list maximum size is old list + new list. This occurs only when + * old list and new list do not overlap at all, and we cannot yet destroy + * VBDs in the old list because the usage counts are busy. + */ + merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); + + /* @i tracks old list; @j tracks new list; @k tracks merged list. */ + i = j = k = 0; + + while ( (i < old_nr) && (j < new_nr) ) + { + if ( old_info[i].device < new_info[j].device ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + i++; + } + else if ( old_info[i].device > new_info[j].device ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + j++; + } + else + { + if ( ((old_info[i].capacity == new_info[j].capacity) && + (old_info[i].info == new_info[j].info)) || + (xlvbd_remove_device(old_info[i].device) != 0) ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + else if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + i++; j++; + } + } + + for ( ; i < old_nr; i++ ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + } + + for ( ; j < new_nr; j++ ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + } + + vbd_info = merged_info; + nr_vbds = k; + + kfree(old_info); + kfree(new_info); +} + + +/* + * Set up all the linux device goop for the virtual block devices (vbd's) that + * xen tells us about. Note that although from xen's pov VBDs are addressed + * simply an opaque 16-bit device number, the domain creation tools + * conventionally allocate these numbers to correspond to those used by 'real' + * linux -- this is just for convenience as it means e.g. that the same + * /etc/fstab can be used when booting with or without xen. + */ +int __init xlvbd_init(void) +{ + int i; + + /* + * If compiled as a module, we don't support unloading yet. We therefore + * permanently increment the reference count to disallow it. + */ + SET_MODULE_OWNER(&xlvbd_block_fops); + MOD_INC_USE_COUNT; + + /* Initialize the global arrays. */ + for ( i = 0; i < 256; i++ ) + { + /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ + xlide_blksize_size[i] = 1024; + xlide_hardsect_size[i] = 512; + xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ + + /* from the generic scsi disk code (drivers/scsi/sd.c) */ + xlscsi_blksize_size[i] = 1024; /* XXX 512; */ + xlscsi_hardsect_size[i] = 512; + xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ + + /* we don't really know what to set these too since it depends */ + xlvbd_blksize_size[i] = 512; + xlvbd_hardsect_size[i] = 512; + xlvbd_max_sectors[i] = 128; + } + + vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + nr_vbds = xlvbd_get_vbd_info(vbd_info); + + if ( nr_vbds < 0 ) + { + kfree(vbd_info); + vbd_info = NULL; + nr_vbds = 0; + } + else + { + for ( i = 0; i < nr_vbds; i++ ) + xlvbd_init_device(&vbd_info[i]); + } + + return 0; +} + + +#ifdef MODULE +module_init(xlvbd_init); +#endif diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile new file mode 100644 index 0000000000..20c8192d3d --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile @@ -0,0 +1,10 @@ + +O_TARGET := drv.o + +subdir-y += frontend +obj-y += frontend/drv.o + +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend +obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile new file mode 100644 index 0000000000..032d02d7cc --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := main.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c new file mode 100644 index 0000000000..dac8624879 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c @@ -0,0 +1,26 @@ +/****************************************************************************** + * arch/xen/drivers/netif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/netif/frontend + * + * Copyright (c) 2004, K A Fraser + */ + +#include +#include + +static int __init init_module(void) +{ + return 0; +} + +static void cleanup_module(void) +{ +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile new file mode 100644 index 0000000000..032d02d7cc --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := main.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c new file mode 100644 index 0000000000..f2c36f1f88 --- /dev/null +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c @@ -0,0 +1,565 @@ +/****************************************************************************** + * arch/xen/drivers/netif/frontend/main.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2004, K A Fraser + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +/* Dynamically-mapped IRQs. */ +static int network_irq, debug_irq; + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NET_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int net_ring_fixmap_idx, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + + +static void _dbg_network_int(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + if ( np->state == STATE_CLOSED ) + return; + + printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," + " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," + " tx_event=0x%08x, state=%d\n", + np->tx_full, np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "net: rx_resp_cons=0x%08x," + " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _dbg_network_int(np->dev); + } +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + int i, ret; + + netop.cmd = NETOP_RESET_RINGS; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); + return ret; + } + + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); + return ret; + } + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, + netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); + np->net_ring = (net_ring_t *)fix_to_virt( + FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NET_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->net_idx->tx_event = + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && + ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + netop_t netop; + NET_RING_IDX i = np->net_idx->rx_req_prod; + + if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || + unlikely(np->state != STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = np->rx_resp_cons + 1; + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + np->rx_bufs_to_notify = 0; + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + tx_req_entry_t *tx; + netop_t netop; + NET_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->size = skb->len; + + wmb(); + np->net_idx->tx_req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->net_idx->tx_resp_prod == i ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + } + + return 0; +} + + +static inline void _network_interrupt(struct net_device *dev) +{ + struct net_private *np = dev->priv; + unsigned long flags; + struct sk_buff *skb; + rx_resp_entry_t *rx; + NET_RING_IDX i; + + if ( unlikely(np->state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + { + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status != RING_STATUS_OK) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + skb->data = skb->tail = skb->head + rx->offset; + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->net_idx->rx_resp_prod != i ) + goto again; +} + + +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _network_interrupt(np->dev); + } +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + + np->state = STATE_SUSPENDED; + wmb(); + + netif_stop_queue(np->dev); + + netop.cmd = NETOP_FLUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + + while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || + (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + np->state = STATE_CLOSED; + wmb(); + + /* Now no longer safe to take interrupts for this device. */ + clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +static int __init init_module(void) +{ +#if 0 + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + netop_t netop; + + INIT_LIST_HEAD(&dev_list); + + network_irq = bind_virq_to_irq(VIRQ_NET); + debug_irq = bind_virq_to_irq(VIRQ_DEBUG); + + err = request_irq(network_irq, network_interrupt, + SA_SAMPLE_RANDOM, "network", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate network interrupt\n"); + goto fail; + } + + err = request_irq(debug_irq, dbg_network_int, + SA_SHIRQ, "net_dbg", &dbg_network_int); + if ( err ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + /* If the VIF is invalid then the query hypercall will fail. */ + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = i; + if ( HYPERVISOR_net_io_op(&netop) != 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = STATE_CLOSED; + np->net_ring_fixmap_idx = fixmap_idx; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +#endif + return 0; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } + + free_irq(network_irq, NULL); + free_irq(debug_irq, NULL); + + unbind_virq_from_irq(VIRQ_NET); + unbind_virq_from_irq(VIRQ_DEBUG); +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile deleted file mode 100644 index 20c8192d3d..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile +++ /dev/null @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-y += frontend -obj-y += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile deleted file mode 100644 index 4c8c17367c..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o control.o interface.o vbd.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h deleted file mode 100644 index 849ad1a1c7..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h +++ /dev/null @@ -1,98 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vblkif/backend/common.h - */ - -#ifndef __VBLKIF__BACKEND__COMMON_H__ -#define __VBLKIF__BACKEND__COMMON_H__ - -#include -#include -#include -#include -#include -#include -#include -#include "../vblkif.h" - -#ifndef NDEBUG -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } -#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) -#else -#define ASSERT(_p) ((void)0) -#define DPRINTK(_f, _a...) ((void)0) -#endif - -typedef struct blkif_st { - /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; - /* Physical parameters of the comms window. */ - unsigned long shmem_frame; - unsigned int evtchn; - int irq; - /* Comms information. */ - blk_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ - BLK_RING_IDX blk_req_cons; /* Request consumer. */ - BLK_RING_IDX blk_resp_prod; /* Private version of response producer. */ - /* VBDs attached to this interface. */ - rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */ - spinlock_t vbd_lock; /* Protects VBD mapping. */ - /* Private fields. */ - struct blkif_st *hash_next; - struct list_head blkdev_list; - spinlock_t blk_ring_lock; -} blkif_t; - -void blkif_create(blkif_create_t *create); -void blkif_destroy(blkif_destroy_t *destroy); -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); -void blkif_get(blkif_t *blkif); -void blkif_put(blkif_t *blkif); - -/* An entry in a list of xen_extents. */ -typedef struct _blkif_extent_le { - blkif_extent_t extent; /* an individual extent */ - struct _blkif_extent_le *next; /* and a pointer to the next */ -} blkif_extent_le_t; - -typedef struct _vbd { - blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ - unsigned char mode; /* VBD_MODE_{R,W} */ - unsigned char type; /* XD_TYPE_xxx */ - blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */ - rb_node_t rb; /* for linking into R-B tree lookup struct */ -} vbd_t; - -long vbd_create(blkif_vbd_create_t *create_params); -long vbd_grow(blkif_vbd_grow_t *grow_params); -long vbd_shrink(blkif_vbd_shrink_t *shrink_params); -long vbd_destroy(blkif_vbd_destroy_t *delete_params); - -void destroy_all_vbds(struct task_struct *p); - -typedef struct { - blkif_t *blkif; - unsigned long id; - atomic_t pendcnt; - unsigned short operation; - unsigned short status; -} pending_req_t; - -/* Describes a [partial] disk extent (part of a block io request) */ -typedef struct { - unsigned short dev; - unsigned short nr_sects; - unsigned long buffer; - xen_sector_t sector_number; -} phys_seg_t; - -int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); - -int vblkif_be_controller_init(void); - -void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs); - -#endif /* __VBLKIF__BACKEND__COMMON_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c deleted file mode 100644 index 19c4d5b57c..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c +++ /dev/null @@ -1,61 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vblkif/backend/control.c - * - * Routines for interfacing with the control plane. - * - * Copyright (c) 2004, Keir Fraser - */ - -#include "common.h" - -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_BLKIF_BE_CREATE: - if ( msg->length != sizeof(blkif_create_t) ) - goto parse_error; - blkif_create((blkif_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DESTROY: - if ( msg->length != sizeof(blkif_destroy_t) ) - goto parse_error; - blkif_destroy((blkif_destroy_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_VBD_CREATE: - if ( msg->length != sizeof(blkif_vbd_create_t) ) - goto parse_error; - vbd_create((blkif_vbd_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_VBD_DESTROY: - if ( msg->length != sizeof(blkif_vbd_destroy_t) ) - goto parse_error; - vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_VBD_GROW: - if ( msg->length != sizeof(blkif_vbd_grow_t) ) - goto parse_error; - vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_VBD_SHRINK: - if ( msg->length != sizeof(blkif_vbd_shrink_t) ) - goto parse_error; - vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]); - break; - default: - goto parse_error; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - -int blkif_ctrlif_init(void) -{ - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx); - return 0; -} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c deleted file mode 100644 index 30e5c16409..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c +++ /dev/null @@ -1,96 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vblkif/backend/interface.c - * - * Block-device interface management. - * - * Copyright (c) 2004, Keir Fraser - */ - -#include "common.h" - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) \ - (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1)) - -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - (blkif->domid != domid) && - (blkif->handle != handle) ) - blkif = blkif->hash_next; - return blkif; -} - -void blkif_create(blkif_create_t *create) -{ - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - unsigned int evtchn = create->evtchn; - unsigned long shmem_frame = create->shmem_frame; - blkif_t **pblkif, *blkif; - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif == NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - goto found_match; - pblkif = &(*pblkif)->hash_next; - } - - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->evtchn = evtchn; - blkif->irq = bind_evtchn_to_irq(evtchn); - blkif->shmem_frame = shmem_frame; - blkif->shmem_vbase = ioremap(shmem_frame<vbd_lock); - spin_lock_init(&blkif->blk_ring_lock); - - request_irq(irq, vblkif_be_int, 0, "vblkif-backend", blkif); - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - create->status = BLKIF_STATUS_OKAY; - return; - - found_match: - create->status = BLKIF_STATUS_INTERFACE_EXISTS; - return; - - evtchn_in_use: - unbind_evtchn_from_irq(evtchn); /* drop refcnt */ - create->status = BLKIF_STATUS_ERROR; - return; -} - -void blkif_destroy(blkif_destroy_t *destroy) -{ - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) == NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - goto found_match; - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; - - found_match: - free_irq(blkif->irq, NULL); - unbind_evtchn_from_irq(blkif->evtchn); - *pblkif = blkif->hash_next; - kmem_cache_free(blkif_cachep, blkif); - destroy->status = BLKIF_STATUS_OKAY; -} - diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c deleted file mode 100644 index cb44ac173b..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c +++ /dev/null @@ -1,508 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vblkif/backend/main.c - * - * Back-end of the driver for virtual block devices. This portion of the - * driver exports a 'unified' block-device interface that can be accessed - * by any operating system that implements a compatible front end. A - * reference front-end implementation can be found in: - * arch/xen/drivers/vblkif/frontend - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - */ - -#include "common.h" - -/* - * These are rather arbitrary. They are fairly large because adjacent requests - * pulled from a communication ring are quite likely to end up being part of - * the same scatter/gather request at the disc. - * - * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW ** - * This will increase the chances of being able to write whole tracks. - * 64 should be enough to keep us competitive with Linux. - */ -#define MAX_PENDING_REQS 64 -#define BATCH_PER_DOMAIN 16 - -/* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. Each buffer_head that completes decrements - * the pendcnt towards zero. When it hits zero, the specified domain has a - * response queued for it, with the saved 'id' passed back. - * - * We can't allocate pending_req's in order, since they may complete out of - * order. We therefore maintain an allocation ring. This ring also indicates - * when enough work has been passed down -- at that point the allocation ring - * will be empty. - */ -static pending_req_t pending_reqs[MAX_PENDING_REQS]; -static unsigned char pending_ring[MAX_PENDING_REQS]; -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; -/* NB. We use a different index type to differentiate from shared blk rings. */ -typedef unsigned int PEND_RING_IDX; -#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) -static PEND_RING_IDX pending_prod, pending_cons; -#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) - -static kmem_cache_t *buffer_head_cachep; - -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned; - -static int lock_buffer(blkif_t *blkif, - unsigned long buffer, - unsigned short size, - int writeable_buffer); -static void unlock_buffer(unsigned long buffer, - unsigned short size, - int writeable_buffer); - -static void io_schedule(unsigned long unused); -static int do_block_io_op(blkif_t *blkif, int max_to_do); -static void dispatch_rw_block_io(blkif_t *blkif, - blk_ring_req_entry_t *req); -static void make_response(blkif_t *blkif, unsigned long id, - unsigned short op, unsigned long st); - - -/****************************************************************** - * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE - */ - -static struct list_head io_schedule_list; -static spinlock_t io_schedule_list_lock; - -static int __on_blkdev_list(blkif_t *blkif) -{ - return blkif->blkdev_list.next != NULL; -} - -static void remove_from_blkdev_list(blkif_t *blkif) -{ - unsigned long flags; - if ( !__on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( __on_blkdev_list(blkif) ) - { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = NULL; - blkif_put(blkif); - } - spin_unlock_irqrestore(&io_schedule_list_lock, flags); -} - -static void add_to_blkdev_list_tail(blkif_t *blkif) -{ - unsigned long flags; - if ( __on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( !__on_blkdev_list(blkif) ) - { - list_add_tail(&blkif->blkdev_list, &io_schedule_list); - blkif_get(blkif); - } - spin_unlock_irqrestore(&io_schedule_list_lock, flags); -} - - -/****************************************************************** - * SCHEDULER FUNCTIONS - */ - -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); - -static void io_schedule(unsigned long unused) -{ - blkif_t *blkif; - struct list_head *ent; - - /* Queue up a batch of requests. */ - while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && - !list_empty(&io_schedule_list) ) - { - ent = io_schedule_list.next; - blkif = list_entry(ent, blkif_t, blkdev_list); - blkif_get(blkif); - remove_from_blkdev_list(blkif); - if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) - add_to_blkdev_list_tail(blkif); - blkif_put(blkif); - } - - /* Push the batch through to disc. */ - run_task_queue(&tq_disk); -} - -static void maybe_trigger_io_schedule(void) -{ - /* - * Needed so that two processes, who together make the following predicate - * true, don't both read stale values and evaluate the predicate - * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... - */ - smp_mb(); - - if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && - !list_empty(&io_schedule_list) ) - tasklet_schedule(&io_schedule_tasklet); -} - - - -/****************************************************************** - * COMPLETION CALLBACK -- Called as bh->b_end_io() - */ - -static void end_block_io_op(struct buffer_head *bh, int uptodate) -{ - pending_req_t *pending_req = bh->b_private; - - /* An error fails the entire request. */ - if ( !uptodate ) - { - DPRINTK("Buffer not up-to-date at end of operation\n"); - pending_req->status = 2; - } - - unlock_buffer(virt_to_phys(bh->b_data), - bh->b_size, - (pending_req->operation==READ)); - - if ( atomic_dec_and_test(&pending_req->pendcnt) ) - { - make_response(pending_req->blkif, pending_req->id, - pending_req->operation, pending_req->status); - blkif_put(pending_req->blkif); - spin_lock(&pend_prod_lock); - pending_ring[MASK_PEND_IDX(pending_prod)] = - pending_req - pending_reqs; - pending_prod++; - spin_unlock(&pend_prod_lock); - maybe_trigger_io_schedule(); - } -} - - - -/****************************************************************************** - * NOTIFICATION FROM GUEST OS. - */ - -void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs) -{ - blkif_t *blkif = dev_id; - add_to_blkdev_list_tail(blkif); - maybe_trigger_io_schedule(); -} - - - -/****************************************************************** - * DOWNWARD CALLS -- These interface with the block-device layer proper. - */ - -static int lock_buffer(blkif_t *blkif, - unsigned long buffer, - unsigned short size, - int writeable_buffer) -{ - unsigned long pfn; - - for ( pfn = buffer >> PAGE_SHIFT; - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); - pfn++ ) - { - } - - return 1; - - fail: - while ( pfn-- > (buffer >> PAGE_SHIFT) ) - { - } - return 0; -} - -static void unlock_buffer(unsigned long buffer, - unsigned short size, - int writeable_buffer) -{ - unsigned long pfn; - - for ( pfn = buffer >> PAGE_SHIFT; - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); - pfn++ ) - { - } -} - -static int do_block_io_op(blkif_t *blkif, int max_to_do) -{ - blk_ring_t *blk_ring = blkif->blk_ring_base; - blk_ring_req_entry_t *req; - BLK_RING_IDX i; - int more_to_do = 0; - - /* Take items off the comms ring, taking care not to overflow. */ - for ( i = blkif->blk_req_cons; - (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != - BLK_RING_SIZE); - i++ ) - { - if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) - { - more_to_do = 1; - break; - } - - req = &blk_ring->ring[MASK_BLK_IDX(i)].req; - switch ( req->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - dispatch_rw_block_io(blkif, req); - break; - - default: - DPRINTK("error: unknown block io operation [%d]\n", - blk_ring->ring[i].req.operation); - make_response(blkif, blk_ring->ring[i].req.id, - blk_ring->ring[i].req.operation, 1); - break; - } - } - - blkif->blk_req_cons = i; - return more_to_do; -} - -static void dispatch_rw_block_io(blkif_t *blkif, - blk_ring_req_entry_t *req) -{ - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); - struct buffer_head *bh; - int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; - unsigned short nr_sects; - unsigned long buffer; - int i, tot_sects; - pending_req_t *pending_req; - - /* We map virtual scatter/gather segments to physical segments. */ - int new_segs, nr_psegs = 0; - phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; - - /* Check that number of segments is sane. */ - if ( unlikely(req->nr_segments == 0) || - unlikely(req->nr_segments > MAX_BLK_SEGS) ) - { - DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); - goto bad_descriptor; - } - - /* - * Check each address/size pair is sane, and convert into a - * physical device and block offset. Note that if the offset and size - * crosses a virtual extent boundary, we may end up with more - * physical scatter/gather segments than virtual segments. - */ - for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects ) - { - buffer = req->buffer_and_sects[i] & ~0x1FF; - nr_sects = req->buffer_and_sects[i] & 0x1FF; - - if ( unlikely(nr_sects == 0) ) - { - DPRINTK("zero-sized data request\n"); - goto bad_descriptor; - } - - phys_seg[nr_psegs].dev = req->device; - phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; - phys_seg[nr_psegs].buffer = buffer; - phys_seg[nr_psegs].nr_sects = nr_sects; - - /* Translate the request into the relevant 'physical device' */ - new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation); - if ( new_segs < 0 ) - { - DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", - operation == READ ? "read" : "write", - req->sector_number + tot_sects, - req->sector_number + tot_sects + nr_sects, - req->device); - goto bad_descriptor; - } - - nr_psegs += new_segs; - ASSERT(nr_psegs <= MAX_BLK_SEGS*2); - } - - for ( i = 0; i < nr_psegs; i++ ) - { - if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, - phys_seg[i].nr_sects << 9, - operation==READ)) ) - { - DPRINTK("invalid buffer\n"); - while ( i-- > 0 ) - unlock_buffer(phys_seg[i].buffer, - phys_seg[i].nr_sects << 9, - operation==READ); - goto bad_descriptor; - } - } - - pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]]; - pending_req->blkif = blkif; - pending_req->id = req->id; - pending_req->operation = operation; - pending_req->status = 0; - atomic_set(&pending_req->pendcnt, nr_psegs); - - blkif_get(blkif); - - /* Now we pass each segment down to the real blkdev layer. */ - for ( i = 0; i < nr_psegs; i++ ) - { - bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); - if ( unlikely(bh == NULL) ) - panic("bh is null\n"); - memset(bh, 0, sizeof (struct buffer_head)); - - bh->b_size = phys_seg[i].nr_sects << 9; - bh->b_dev = phys_seg[i].dev; - bh->b_rsector = (unsigned long)phys_seg[i].sector_number; - - /* SMH: we store a 'pseudo-virtual' bogus address in b_data since - later code will undo this transformation (i.e. +-PAGE_OFFSET). */ - bh->b_data = phys_to_virt(phys_seg[i].buffer); - - /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */ - bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; - bh->b_end_io = end_block_io_op; - bh->b_private = pending_req; - - bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock); - if ( operation == WRITE ) - bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); - - atomic_set(&bh->b_count, 1); - - /* Dispatch a single request. We'll flush it to disc later. */ - submit_bh(operation, bh); - } - - return; - - bad_descriptor: - make_response(blkif, req->id, req->operation, 1); -} - - - -/****************************************************************** - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING - */ - - -static void make_response(blkif_t *blkif, unsigned long id, - unsigned short op, unsigned long st) -{ - blk_ring_resp_entry_t *resp; - - /* Place on the response ring for the relevant domain. */ - spin_lock(&blkif->blk_ring_lock); - resp = &blkif->blk_ring_base-> - ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp; - resp->id = id; - resp->operation = op; - resp->status = st; - wmb(); - blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod; - spin_unlock(&blkif->blk_ring_lock); - - /* Kick the relevant domain. */ - notify_via_evtchn(blkif->evtchn); -} - -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs) -{ -#if 0 - unsigned long flags; - struct task_struct *p; - blk_ring_t *blk_ring; - int i; - - printk("Dumping block queue stats: nr_pending = %d" - " (prod=0x%08x,cons=0x%08x)\n", - NR_PENDING_REQS, pending_prod, pending_cons); - - read_lock_irqsave(&tasklist_lock, flags); - for_each_domain ( p ) - { - printk("Domain: %llu\n", blkif->domain); - blk_ring = blkif->blk_ring_base; - printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/" - "0x%08x on_list=%d\n", - blk_ring->req_prod, blkif->blk_req_cons, - blk_ring->resp_prod, blkif->blk_resp_prod, - __on_blkdev_list(p)); - } - read_unlock_irqrestore(&tasklist_lock, flags); - - for ( i = 0; i < MAX_PENDING_REQS; i++ ) - { - printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n", - i, pending_reqs[i].domain, pending_reqs[i].id, - atomic_read(&pending_reqs[i].pendcnt), - pending_reqs[i].operation, pending_reqs[i].status); - } -#endif -} - -void unlink_blkdev_info(blkif_t *blkif) -{ - unsigned long flags; - - spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( __on_blkdev_list(blkif) ) - { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = (void *)0xdeadbeef; - blkif_put(blkif); - } - spin_unlock_irqrestore(&io_schedule_list_lock, flags); -} - -static int __init init_module(void) -{ - int i; - - pending_cons = 0; - pending_prod = MAX_PENDING_REQS; - memset(pending_reqs, 0, sizeof(pending_reqs)); - for ( i = 0; i < MAX_PENDING_REQS; i++ ) - pending_ring[i] = i; - - for ( i = 0; i < NR_CPUS; i++ ) - completed_bhs[i] = NULL; - - spin_lock_init(&io_schedule_list_lock); - INIT_LIST_HEAD(&io_schedule_list); - - if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, - SA_SHIRQ, "vblkif-backend-dbg", &blkif_debug_int) != 0 ) - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); - - buffer_head_cachep = kmem_cache_create( - "buffer_head_cache", sizeof(struct buffer_head), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - - return 0; -} - -static void cleanup_module(void) -{ -} - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c deleted file mode 100644 index 2545c00d46..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c +++ /dev/null @@ -1,578 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vblkif/backend/vbd.c - * - * Routines for managing virtual block devices (VBDs). - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - */ - -#include "common.h" - -void vbd_create(blkif_vbd_create_t *create) -{ - vbd_t *vbd; - rb_node_t **rb_p, *rb_parent = NULL; - blkif_t *blkif; - blkif_vdev_t vdevice = create->vdevice; - - blkif = blkif_find_by_handle(create->domid, create->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", - create->domid, create->blkif_handle); - create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; - } - - spin_lock(&blkif->vbd_lock); - - rb_p = &blkif->vbd_rb.rb_node; - while ( *rb_p != NULL ) - { - rb_parent = *rb_p; - vbd = rb_entry(rb_parent, vbd_t, rb); - if ( vdevice < vbd->vdevice ) - { - rb_p = &rb_parent->rb_left; - } - else if ( vdevice > vbd->vdevice ) - { - rb_p = &rb_parent->rb_right; - } - else - { - DPRINTK("vbd_create attempted for already existing vbd\n"); - create->status = BLKIF_STATUS_VBD_EXISTS; - goto out; - } - } - - if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) - { - DPRINTK("vbd_create: out of memory\n"); - create->status = BLKIF_STATUS_OUT_OF_MEMORY; - goto out; - } - - vbd->vdevice = vdevice; - vbd->mode = create->mode; - vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; - vbd->extents = NULL; - - rb_link_node(&vbd->rb, rb_parent, rb_p); - rb_insert_color(&vbd->rb, &blkif->vbd_rb); - - create->status = BLKIF_STATUS_OKAY; - - out: - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); -} - - -/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ -void vbd_grow(blkif_vbd_grow_t *grow) -{ - blkif_t *blkif; - xen_extent_le_t **px, *x; - vbd_t *vbd = NULL; - rb_node_t *rb; - blkif_vdev_t vdevice = grow->vdevice; - - blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", - grow->domid, grow->blkif_handle); - grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; - } - - spin_lock(&blkif->vbd_lock); - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - break; - } - - if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) - { - DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); - grow->status = BLKIF_STATUS_VBD_NOT_FOUND; - goto out; - } - - if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) ) - { - DPRINTK("vbd_grow: out of memory\n"); - grow->status = BLKIF_STATUS_OUT_OF_MEMORY; - goto out; - } - - x->extent.device = grow->extent.device; - x->extent.sector_start = grow->extent.sector_start; - x->extent.sector_length = grow->extent.sector_length; - x->next = (xen_extent_le_t *)NULL; - - for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) - continue; - - *px = x; - - grow->status = BLKIF_STATUS_OKAY; - - out: - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); -} - - -void vbd_shrink(blkif_vbd_shrink_t *shrink) -{ - blkif_t *blkif; - xen_extent_le_t **px, *x; - vbd_t *vbd = NULL; - rb_node_t *rb; - blkif_vdev_t vdevice = shrink->vdevice; - - blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", - shrink->domid, shrink->blkif_handle); - shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; - } - - spin_lock(&blkif->vbd_lock); - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - break; - } - - if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) - { - shrink->status = BLKIF_STATUS_VBD_NOT_FOUND; - goto out; - } - - if ( unlikely(vbd->extents == NULL) ) - { - shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND; - goto out; - } - - /* Find the last extent. We now know that there is at least one. */ - for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next ) - continue; - - x = *px; - *px = x->next; - kfree(x); - - shrink->status = BLKIF_STATUS_OKAY; - - out: - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); -} - - -void vbd_destroy(blkif_vbd_destroy_t *destroy) -{ - blkif_t *blkif; - vbd_t *vbd; - rb_node_t *rb; - xen_extent_le_t *x, *t; - blkif_vdev_t vdevice = destroy->vdevice; - - blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", - destroy->domid, destroy->blkif_handle); - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; - return; - } - - spin_lock(&blkif->vbd_lock); - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - goto found; - } - - destroy->status = BLKIF_STATUS_VBD_NOT_FOUND; - goto out; - - found: - rb_erase(rb, &blkif->vbd_rb); - x = vbd->extents; - kfree(vbd); - - while ( x != NULL ) - { - t = x->next; - kfree(x); - x = t; - } - - out: - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); -} - - -void destroy_all_vbds(blkif_t *blkif) -{ - vbd_t *vbd; - rb_node_t *rb; - xen_extent_le_t *x, *t; - - spin_lock(&blkif->vbd_lock); - - while ( (rb = blkif->vbd_rb.rb_node) != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - - rb_erase(rb, &blkif->vbd_rb); - x = vbd->extents; - kfree(vbd); - - while ( x != NULL ) - { - t = x->next; - kfree(x); - x = t; - } - } - - spin_unlock(&blkif->vbd_lock); -} - - -static int vbd_probe_single(xen_disk_info_t *xdi, - vbd_t *vbd, - struct task_struct *p) -{ - xen_extent_le_t *x; - xen_disk_t cur_disk; - - if ( xdi->count == xdi->max ) - { - DPRINTK("vbd_probe_devices: out of space for probe.\n"); - return -ENOMEM; - } - - cur_disk.device = vbd->vdevice; - cur_disk.info = vbd->type; - if ( !VBD_CAN_WRITE(vbd) ) - cur_disk.info |= XD_FLAG_RO; - cur_disk.capacity = 0ULL; - for ( x = vbd->extents; x != NULL; x = x->next ) - cur_disk.capacity += x->extent.nr_sectors; - cur_disk.domain = p->domain; - - /* Now copy into relevant part of user-space buffer */ - if( copy_to_user(&xdi->disks[xdi->count], - &cur_disk, - sizeof(xen_disk_t)) ) - { - DPRINTK("vbd_probe_devices: copy_to_user failed\n"); - return -EFAULT; - } - - xdi->count++; - - return 0; -} - - -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) -{ - int rc = 0; - rb_node_t *rb; - - spin_lock(&p->vbd_lock); - - if ( (rb = p->vbd_rb.rb_node) == NULL ) - goto out; - - new_subtree: - /* STEP 1. Find least node (it'll be left-most). */ - while ( rb->rb_left != NULL ) - rb = rb->rb_left; - - for ( ; ; ) - { - /* STEP 2. Dealt with left subtree. Now process current node. */ - if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 ) - goto out; - - /* STEP 3. Process right subtree, if any. */ - if ( rb->rb_right != NULL ) - { - rb = rb->rb_right; - goto new_subtree; - } - - /* STEP 4. Done both subtrees. Head back through ancesstors. */ - for ( ; ; ) - { - /* We're done when we get back to the root node. */ - if ( rb->rb_parent == NULL ) - goto out; - /* If we are left of parent, then parent is next to process. */ - if ( rb->rb_parent->rb_left == rb ) - break; - /* If we are right of parent, then we climb to grandparent. */ - rb = rb->rb_parent; - } - - rb = rb->rb_parent; - } - - out: - spin_unlock(&p->vbd_lock); - return rc; -} - - -/* - * Return information about the VBDs available for a given domain, or for all - * domains; in the general case the 'domain' argument will be 0 which means - * "information about the caller"; otherwise the 'domain' argument will - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of - * these cases require the caller to be privileged. - */ -long vbd_probe(vbd_probe_t *probe) -{ - struct task_struct *p = NULL; - unsigned long flags; - long ret = 0; - - if ( probe->domain != 0 ) - { - /* We can only probe for ourselves (unless we're privileged). */ - if( (probe->domain != current->domain) && !IS_PRIV(current) ) - return -EPERM; - - if ( (probe->domain != VBD_PROBE_ALL) && - ((p = find_domain_by_id(probe->domain)) == NULL) ) - { - DPRINTK("vbd_probe attempted for non-existent domain %llu\n", - probe->domain); - return -EINVAL; - } - } - else - { - /* Default is to probe for ourselves. */ - p = current; - get_task_struct(p); /* to mirror final put_task_struct */ - } - - if ( probe->domain == VBD_PROBE_ALL ) - { - read_lock_irqsave(&tasklist_lock, flags); - for_each_domain ( p ) - { - if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) - { - read_unlock_irqrestore(&tasklist_lock, flags); - goto out; - } - } - read_unlock_irqrestore(&tasklist_lock, flags); - } - else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) - goto out; - - out: - if ( ret != 0 ) - DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); - if ( p != NULL ) - put_task_struct(p); - return ret; -} - - -long vbd_info(vbd_info_t *info) -{ - struct task_struct *p; - xen_extent_le_t *x; - xen_extent_t *extents; - vbd_t *vbd = NULL; - rb_node_t *rb; - long ret = 0; - - if ( (info->domain != current->domain) && !IS_PRIV(current) ) - return -EPERM; - - if ( (p = find_domain_by_id(info->domain)) == NULL ) - { - DPRINTK("vbd_info attempted for non-existent domain %llu\n", - info->domain); - return -EINVAL; - } - - spin_lock(&p->vbd_lock); - - rb = p->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( info->vdevice < vbd->vdevice ) - rb = rb->rb_left; - else if ( info->vdevice > vbd->vdevice ) - rb = rb->rb_right; - else - break; - } - - if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) ) - { - DPRINTK("vbd_info attempted on non-existent VBD.\n"); - ret = -EINVAL; - goto out; - } - - info->mode = vbd->mode; - info->nextents = 0; - - extents = info->extents; - for ( x = vbd->extents; x != NULL; x = x->next ) - { - if ( info->nextents == info->maxextents ) - break; - if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) ) - { - DPRINTK("vbd_info: copy_to_user failed\n"); - ret = -EFAULT; - goto out; - } - extents++; - info->nextents++; - } - - out: - spin_unlock(&p->vbd_lock); - put_task_struct(p); - return ret; -} - - -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) -{ - xen_extent_le_t *x; - vbd_t *vbd; - rb_node_t *rb; - xen_sector_t sec_off; - unsigned long nr_secs; - - spin_lock(&p->vbd_lock); - - rb = p->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, vbd_t, rb); - if ( pseg->dev < vbd->vdevice ) - rb = rb->rb_left; - else if ( pseg->dev > vbd->vdevice ) - rb = rb->rb_right; - else - goto found; - } - - DPRINTK("vbd_translate; domain %llu attempted to access " - "non-existent VBD.\n", p->domain); - - spin_unlock(&p->vbd_lock); - return -ENODEV; - - found: - - if ( ((operation == READ) && !VBD_CAN_READ(vbd)) || - ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) ) - { - spin_unlock(&p->vbd_lock); - return -EACCES; - } - - /* - * Now iterate through the list of xen_extents, working out which should - * be used to perform the translation. - */ - sec_off = pseg->sector_number; - nr_secs = pseg->nr_sects; - for ( x = vbd->extents; x != NULL; x = x->next ) - { - if ( sec_off < x->extent.nr_sectors ) - { - pseg->dev = x->extent.device; - pseg->sector_number = x->extent.start_sector + sec_off; - if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) ) - goto overrun; - spin_unlock(&p->vbd_lock); - return 1; - } - sec_off -= x->extent.nr_sectors; - } - - DPRINTK("vbd_translate: end of vbd.\n"); - spin_unlock(&p->vbd_lock); - return -EACCES; - - /* - * Here we deal with overrun onto the following extent. We don't deal with - * overrun of more than one boundary since each request is restricted to - * 2^9 512-byte sectors, so it should be trivial for control software to - * ensure that extents are large enough to prevent excessive overrun. - */ - overrun: - - /* Adjust length of first chunk to run to end of first extent. */ - pseg[0].nr_sects = x->extent.nr_sectors - sec_off; - - /* Set second chunk buffer and length to start where first chunk ended. */ - pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); - pseg[1].nr_sects = nr_secs - pseg[0].nr_sects; - - /* Now move to the next extent. Check it exists and is long enough! */ - if ( unlikely((x = x->next) == NULL) || - unlikely(x->extent.nr_sectors < pseg[1].nr_sects) ) - { - DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); - spin_unlock(&p->vbd_lock); - return -EACCES; - } - - /* Store the real device and start sector for the second chunk. */ - pseg[1].dev = x->extent.device; - pseg[1].sector_number = x->extent.start_sector; - - spin_unlock(&p->vbd_lock); - return 2; -} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile deleted file mode 100644 index 35986ca54a..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := block.o vbd.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c deleted file mode 100644 index d00dd98f7b..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c +++ /dev/null @@ -1,625 +0,0 @@ -/****************************************************************************** - * block.c - * - * Xenolinux virtual block-device driver. - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "block.h" -#include -#include -#include -#include -#include - -#include - -typedef unsigned char byte; /* from linux/ide.h */ - -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 -static unsigned int state = STATE_SUSPENDED; - -/* Dynamically-mapped IRQs. */ -static int xlblk_response_irq, xlblk_update_irq; - -static blk_ring_t *blk_ring; -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ -static BLK_RING_IDX req_prod; /* Private request producer. */ - -/* We plug the I/O ring if the driver is suspended or if the ring is full. */ -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ - (state != STATE_ACTIVE)) - - -/* - * Request queues with outstanding work, but ring is currently full. - * We need no special lock here, as we always access this with the - * io_request_lock held. We only need a small maximum list. - */ -#define MAX_PENDING 8 -static request_queue_t *pending_queues[MAX_PENDING]; -static int nr_pending; - -static kdev_t sg_dev; -static int sg_operation = -1; -static unsigned long sg_next_sect; -#define DISABLE_SCATTERGATHER() (sg_operation = -1) - -static inline void signal_requests_to_xen(void) -{ - block_io_op_t op; - - DISABLE_SCATTERGATHER(); - blk_ring->req_prod = req_prod; - - op.cmd = BLOCK_IO_OP_SIGNAL; - HYPERVISOR_block_io_op(&op); - return; -} - - -/* - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen - * - * Schedule a task for keventd to run, which will update the VBDs and perform - * the corresponding updates to our view of VBD state, so the XenoLinux will - * respond to changes / additions / deletions to the set of VBDs automatically. - */ -static struct tq_struct update_tq; -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); -} -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); -} - - -int xen_block_open(struct inode *inode, struct file *filep) -{ - short xldev = inode->i_rdev; - struct gendisk *gd = get_gendisk(xldev); - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); - - if ( gd->part[minor].nr_sects == 0 ) - { - /* - * Device either doesn't exist, or has zero capacity; we use a few - * cheesy heuristics to return the relevant error code - */ - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || - ((minor & (gd->max_p - 1)) != 0) ) - { - /* - * We have a real device, but no such partition, or we just have a - * partition number so guess this is the problem. - */ - return -ENXIO; /* no such device or address */ - } - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) - { - /* This is a removable device => assume that media is missing. */ - return -ENOMEDIUM; /* media not present (this is a guess) */ - } - else - { - /* Just go for the general 'no such device' error. */ - return -ENODEV; /* no such device */ - } - } - - /* Update of usage count is protected by per-device semaphore. */ - disk->usage++; - - return 0; -} - - -int xen_block_release(struct inode *inode, struct file *filep) -{ - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --disk->usage == 0 ) - { - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); - } - - return 0; -} - - -int xen_block_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) -{ - kdev_t dev = inode->i_rdev; - struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; - int i; - - /* NB. No need to check permissions. That is done for us. */ - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - - gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; - - switch ( command ) - { - case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); - return put_user(part->nr_sects, (unsigned long *) argument); - - case BLKGETSIZE64: - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, - (u64)part->nr_sects * 512); - return put_user((u64)part->nr_sects * 512, (u64 *) argument); - - case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return xen_block_revalidate(dev); - - case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - - case BLKBSZGET: /* get block size */ - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); - break; - - case BLKBSZSET: /* set block size */ - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); - break; - - case BLKRASET: /* set read-ahead */ - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); - break; - - case BLKRAGET: /* get read-ahead */ - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); - break; - - case HDIO_GETGEO: - /* note: these values are complete garbage */ - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); - if (!argument) return -EINVAL; - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; - return 0; - - case HDIO_GETGEO_BIG: - /* note: these values are complete garbage */ - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); - if (!argument) return -EINVAL; - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; - return 0; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); - return -ENOSYS; - - default: - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); - return -ENOSYS; - } - - return 0; -} - -/* check media change: should probably do something here in some cases :-) */ -int xen_block_check(kdev_t dev) -{ - DPRINTK("xen_block_check\n"); - return 0; -} - -int xen_block_revalidate(kdev_t dev) -{ - struct block_device *bd; - struct gendisk *gd; - xl_disk_t *disk; - unsigned long capacity; - int i, rc = 0; - - if ( (bd = bdget(dev)) == NULL ) - return -EINVAL; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(dev)) == NULL) || - ((disk = xldev_to_xldisk(dev)) == NULL) || - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) - { - rc = -EINVAL; - goto out; - } - - if ( disk->usage > 1 ) - { - rc = -EBUSY; - goto out; - } - - /* Only reread partition table if VBDs aren't mapped to partitions. */ - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) - { - for ( i = gd->max_p - 1; i >= 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * hypervisor_request - * - * request block io - * - * id: for guest use only. - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. - */ -static int hypervisor_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device) -{ - unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); - struct gendisk *gd; - blk_ring_req_entry_t *req; - struct buffer_head *bh; - - if ( unlikely(nr_sectors >= (1<<9)) ) - BUG(); - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) - BUG(); - - if ( unlikely(state == STATE_CLOSED) ) - return 1; - - switch ( operation ) - { - - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: - gd = get_gendisk(device); - - /* - * Update the sector_number we'll pass down as appropriate; note that - * we could sanity check that resulting sector will be in this - * partition, but this will happen in xen anyhow. - */ - sector_number += gd->part[MINOR(device)].start_sect; - - /* - * If this unit doesn't consist of virtual (i.e., Xen-specified) - * partitions then we clear the partn bits from the device number. - */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & - GENHD_FL_VIRT_PARTNS) ) - device &= ~(gd->max_p - 1); - - if ( (sg_operation == operation) && - (sg_dev == device) && - (sg_next_sect == sector_number) ) - { - req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; - bh = (struct buffer_head *)id; - bh->b_reqnext = (struct buffer_head *)req->id; - req->id = id; - req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; - if ( ++req->nr_segments < MAX_BLK_SEGS ) - sg_next_sect += nr_sectors; - else - DISABLE_SCATTERGATHER(); - return 0; - } - else if ( RING_PLUGGED ) - { - return 1; - } - else - { - sg_operation = operation; - sg_dev = device; - sg_next_sect = sector_number + nr_sectors; - } - break; - - default: - panic("unknown op %d\n", operation); - } - - /* Fill out a communications ring structure. */ - req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; - req->id = id; - req->operation = operation; - req->sector_number = (xen_sector_t)sector_number; - req->device = device; - req->nr_segments = 1; - req->buffer_and_sects[0] = buffer_ma | nr_sectors; - req_prod++; - - return 0; -} - - -/* - * do_xlblk_request - * read a block; request is in a request queue - */ -void do_xlblk_request(request_queue_t *rq) -{ - struct request *req; - struct buffer_head *bh, *next_bh; - int rw, nsect, full, queued = 0; - - DPRINTK("xlblk.c::do_xlblk_request\n"); - - while ( !rq->plugged && !list_empty(&rq->queue_head)) - { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) - goto out; - - DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", - req, req->cmd, req->sector, - req->current_nr_sectors, req->nr_sectors, req->bh); - - rw = req->cmd; - if ( rw == READA ) - rw = READ; - if ( unlikely((rw != READ) && (rw != WRITE)) ) - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); - - req->errors = 0; - - bh = req->bh; - while ( bh != NULL ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - - full = hypervisor_request( - (unsigned long)bh, - (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); - - if ( full ) - { - bh->b_reqnext = next_bh; - pending_queues[nr_pending++] = rq; - if ( unlikely(nr_pending >= MAX_PENDING) ) - BUG(); - goto out; - } - - queued++; - - /* Dequeue the buffer head from the request. */ - nsect = bh->b_size >> 9; - bh = req->bh = next_bh; - - if ( bh != NULL ) - { - /* There's another buffer head to do. Update the request. */ - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; - req->buffer = bh->b_data; - } - else - { - /* That was the last buffer head. Finalise the request. */ - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) - BUG(); - blkdev_dequeue_request(req); - end_that_request_last(req); - } - } - } - - out: - if ( queued != 0 ) signal_requests_to_xen(); -} - - -static void kick_pending_request_queues(void) -{ - /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) - { - /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_PLUGGED ) - do_xlblk_request(pending_queues[--nr_pending]); - } -} - - -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - BLK_RING_IDX i; - unsigned long flags; - struct buffer_head *bh, *next_bh; - - if ( unlikely(state == STATE_CLOSED) ) - return; - - spin_lock_irqsave(&io_request_lock, flags); - - for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) - { - blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; - switch ( bret->operation ) - { - case XEN_BLOCK_READ: - case XEN_BLOCK_WRITE: - if ( unlikely(bret->status != 0) ) - DPRINTK("Bad return from blkdev data request: %lx\n", - bret->status); - for ( bh = (struct buffer_head *)bret->id; - bh != NULL; - bh = next_bh ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, !bret->status); - } - break; - - default: - BUG(); - } - } - - resp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&io_request_lock, flags); -} - - -static void reset_xlblk_interface(void) -{ - block_io_op_t op; - - nr_pending = 0; - - op.cmd = BLOCK_IO_OP_RESET; - if ( HYPERVISOR_block_io_op(&op) != 0 ) - printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); - - op.cmd = BLOCK_IO_OP_RING_ADDRESS; - (void)HYPERVISOR_block_io_op(&op); - - set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); - blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; - - wmb(); - state = STATE_ACTIVE; -} - - -int __init xlblk_init(void) -{ - int error; - - reset_xlblk_interface(); - - xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); - xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); - - error = request_irq(xlblk_response_irq, xlblk_response_int, - SA_SAMPLE_RANDOM, "blkdev", NULL); - if ( error ) - { - printk(KERN_ALERT "Could not allocate receive interrupt\n"); - goto fail; - } - - error = request_irq(xlblk_update_irq, xlblk_update_int, - 0, "blkdev", NULL); - - if ( error ) - { - printk(KERN_ALERT "Could not allocate block update interrupt\n"); - goto fail; - } - - (void)xlvbd_init(); - - return 0; - - fail: - return error; -} - - -static void __exit xlblk_cleanup(void) -{ - xlvbd_cleanup(); - free_irq(xlblk_response_irq, NULL); - free_irq(xlblk_update_irq, NULL); - unbind_virq_from_irq(VIRQ_BLKDEV); - unbind_virq_from_irq(VIRQ_VBD_UPD); -} - - -#ifdef MODULE -module_init(xlblk_init); -module_exit(xlblk_cleanup); -#endif - - -void blkdev_suspend(void) -{ - state = STATE_SUSPENDED; - wmb(); - - while ( resp_cons != blk_ring->req_prod ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - state = STATE_CLOSED; - wmb(); - - clear_fixmap(FIX_BLKRING_BASE); -} - - -void blkdev_resume(void) -{ - reset_xlblk_interface(); - spin_lock_irq(&io_request_lock); - kick_pending_request_queues(); - spin_unlock_irq(&io_request_lock); -} diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h deleted file mode 100644 index e41e03970e..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h +++ /dev/null @@ -1,82 +0,0 @@ -/****************************************************************************** - * block.h - * - * Shared definitions between all levels of XenoLinux Virtual block devices. - */ - -#ifndef __XEN_DRIVERS_BLOCK_H__ -#define __XEN_DRIVERS_BLOCK_H__ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if 0 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -#if 0 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) -#else -#define DPRINTK_IOCTL(_f, _a...) ((void)0) -#endif - -/* Private gendisk->flags[] values. */ -#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ - -/* - * We have one of these per vbd, whether ide, scsi or 'other'. - * They hang in an array off the gendisk structure. We may end up putting - * all kinds of interesting stuff here :-) - */ -typedef struct xl_disk { - int usage; -} xl_disk_t; - -extern int xen_control_msg(int operration, char *buffer, int size); -extern int xen_block_open(struct inode *inode, struct file *filep); -extern int xen_block_release(struct inode *inode, struct file *filep); -extern int xen_block_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument); -extern int xen_block_check(kdev_t dev); -extern int xen_block_revalidate(kdev_t dev); -extern void do_xlblk_request (request_queue_t *rq); - -extern void xlvbd_update_vbds(void); - -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) -{ - struct gendisk *gd = get_gendisk(xldev); - - if ( gd == NULL ) - return NULL; - - return (xl_disk_t *)gd->real_devices + - (MINOR(xldev) >> gd->minor_shift); -} - - -/* Virtual block-device subsystem. */ -extern int xlvbd_init(void); -extern void xlvbd_cleanup(void); - -#endif /* __XEN_DRIVERS_BLOCK_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c deleted file mode 100644 index e08b976c56..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c +++ /dev/null @@ -1,561 +0,0 @@ -/****************************************************************************** - * vbd.c - * - * Xenolinux virtual block-device driver (xvd). - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - */ - -#include "block.h" -#include - -/* - * For convenience we distinguish between ide, scsi and 'other' (i.e. - * potentially combinations of the two) in the naming scheme and in a few - * other places (like default readahead, etc). - */ -#define XLIDE_MAJOR_NAME "hd" -#define XLSCSI_MAJOR_NAME "sd" -#define XLVBD_MAJOR_NAME "xvd" - -#define XLIDE_DEVS_PER_MAJOR 2 -#define XLSCSI_DEVS_PER_MAJOR 16 -#define XLVBD_DEVS_PER_MAJOR 16 - -#define XLIDE_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ -#define XLIDE_MAX_PART (1 << XLIDE_PARTN_SHIFT) /* minors per ide vbd */ - -#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ - -#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ -#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ - -/* The below are for the generic drivers/block/ll_rw_block.c code. */ -static int xlide_blksize_size[256]; -static int xlide_hardsect_size[256]; -static int xlide_max_sectors[256]; -static int xlscsi_blksize_size[256]; -static int xlscsi_hardsect_size[256]; -static int xlscsi_max_sectors[256]; -static int xlvbd_blksize_size[256]; -static int xlvbd_hardsect_size[256]; -static int xlvbd_max_sectors[256]; - -/* Information from Xen about our VBDs. */ -#define MAX_VBDS 64 -static int nr_vbds; -static xen_disk_t *vbd_info; - -static struct block_device_operations xlvbd_block_fops = -{ - open: xen_block_open, - release: xen_block_release, - ioctl: xen_block_ioctl, - check_media_change: xen_block_check, - revalidate: xen_block_revalidate, -}; - -static int xlvbd_get_vbd_info(xen_disk_t *disk_info) -{ - int error; - block_io_op_t op; - - /* Probe for disk information. */ - memset(&op, 0, sizeof(op)); - op.cmd = BLOCK_IO_OP_VBD_PROBE; - op.u.probe_params.domain = 0; - op.u.probe_params.xdi.max = MAX_VBDS; - op.u.probe_params.xdi.disks = disk_info; - op.u.probe_params.xdi.count = 0; - - if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) - { - printk(KERN_ALERT "Could not probe disks (%d)\n", error); - return -1; - } - - return op.u.probe_params.xdi.count; -} - -/* - * xlvbd_init_device - initialise a VBD device - * @disk: a xen_disk_t describing the VBD - * - * Takes a xen_disk_t * that describes a VBD the domain has access to. - * Performs appropriate initialisation and registration of the device. - * - * Care needs to be taken when making re-entrant calls to ensure that - * corruption does not occur. Also, devices that are in use should not have - * their details updated. This is the caller's responsibility. - */ -static int xlvbd_init_device(xen_disk_t *xd) -{ - int device = xd->device; - int major = MAJOR(device); - int minor = MINOR(device); - int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - char *major_name; - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk; - int i, rc = 0, max_part, partno; - unsigned long capacity; - - unsigned char buf[64]; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) - { - printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( is_ide ) { - - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - - } else if ( is_scsi ) { - - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; - - } else if (XD_VIRTUAL(xd->info)) { - - major_name = XLVBD_MAJOR_NAME; - max_part = XLVBD_MAX_PART; - - } else { - - /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */ - printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", - major, minor); - is_scsi = 1; - major_name = "cciss"; - max_part = XLSCSI_MAX_PART; - - } - - partno = minor & (max_part - 1); - - if ( (gd = get_gendisk(device)) == NULL ) - { - rc = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( rc < 0 ) - { - printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - goto out; - } - - if ( is_ide ) - { - blksize_size[major] = xlide_blksize_size; - hardsect_size[major] = xlide_hardsect_size; - max_sectors[major] = xlide_max_sectors; - read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ - } - else if ( is_scsi ) - { - blksize_size[major] = xlscsi_blksize_size; - hardsect_size[major] = xlscsi_hardsect_size; - max_sectors[major] = xlscsi_max_sectors; - read_ahead[major] = 0; /* XXX 8; -- guessing */ - } - else - { - blksize_size[major] = xlvbd_blksize_size; - hardsect_size[major] = xlvbd_hardsect_size; - max_sectors[major] = xlvbd_max_sectors; - read_ahead[major] = 8; - } - - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); - - /* - * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them down to Xen. - */ - blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); - - /* Construct an appropriate gendisk structure. */ - gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); - gd->major = major; - gd->major_name = major_name; - - gd->max_p = max_part; - if ( is_ide ) - { - gd->minor_shift = XLIDE_PARTN_SHIFT; - gd->nr_real = XLIDE_DEVS_PER_MAJOR; - } - else if ( is_scsi ) - { - gd->minor_shift = XLSCSI_PARTN_SHIFT; - gd->nr_real = XLSCSI_DEVS_PER_MAJOR; - } - else - { - gd->minor_shift = XLVBD_PARTN_SHIFT; - gd->nr_real = XLVBD_DEVS_PER_MAJOR; - } - - /* - ** The sizes[] and part[] arrays hold the sizes and other - ** information about every partition with this 'major' (i.e. - ** every disk sharing the 8 bit prefix * max partns per disk) - */ - gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); - gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), - GFP_KERNEL); - memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); - memset(gd->part, 0, max_part * gd->nr_real - * sizeof(struct hd_struct)); - - - gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), - GFP_KERNEL); - memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); - - gd->next = NULL; - gd->fops = &xlvbd_block_fops; - - gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), - GFP_KERNEL); - gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); - - memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); - memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); - - add_gendisk(gd); - - blk_size[major] = gd->sizes; - } - - if ( XD_READONLY(xd->info) ) - set_device_ro(device, 1); - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN; - - /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */ - capacity = (unsigned long)xd->capacity; - - if ( partno != 0 ) - { - /* - * If this was previously set up as a real disc we will have set - * up partition-table information. Virtual partitions override - * 'real' partitions, and the two cannot coexist on a device. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(max_part-1)] != 0) ) - { - /* - * Any non-zero sub-partition entries must be cleaned out before - * installing 'virtual' partition entries. The two types cannot - * coexist, and virtual partitions are favoured. - */ - kdev_t dev = device & ~(max_part-1); - for ( i = max_part - 1; i > 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - printk(KERN_ALERT - "Virtual partitions found for /dev/%s - ignoring any " - "real partition information we may have found.\n", - disk_name(gd, MINOR(device), buf)); - } - - /* Need to skankily setup 'partition' information */ - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity; - - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - } - else - { - gd->part[minor].nr_sects = capacity; - gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9); - - /* Some final fix-ups depending on the device type */ - switch ( XD_TYPE(xd->info) ) - { - case XD_TYPE_CDROM: - case XD_TYPE_FLOPPY: - case XD_TYPE_TAPE: - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; - printk(KERN_ALERT - "Skipping partition check on %s /dev/%s\n", - XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : - (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : - "floppy"), disk_name(gd, MINOR(device), buf)); - break; - - case XD_TYPE_DISK: - /* Only check partitions on real discs (not virtual!). */ - if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - { - printk(KERN_ALERT - "Skipping partition check on virtual /dev/%s\n", - disk_name(gd, MINOR(device), buf)); - break; - } - register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity); - break; - - default: - printk(KERN_ALERT "XenoLinux: unknown device type %d\n", - XD_TYPE(xd->info)); - break; - } - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * xlvbd_remove_device - remove a device node if possible - * @device: numeric device ID - * - * Updates the gendisk structure and invalidates devices. - * - * This is OK for now but in future, should perhaps consider where this should - * deallocate gendisks / unregister devices. - */ -static int xlvbd_remove_device(int device) -{ - int i, rc = 0, minor = MINOR(device); - struct gendisk *gd; - struct block_device *bd; - xl_disk_t *disk = NULL; - - if ( (bd = bdget(device)) == NULL ) - return -1; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(device)) == NULL) || - ((disk = xldev_to_xldisk(device)) == NULL) ) - BUG(); - - if ( disk->usage != 0 ) - { - printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); - rc = -1; - goto out; - } - - if ( (minor & (gd->max_p-1)) != 0 ) - { - /* 1: The VBD is mapped to a partition rather than a whole unit. */ - invalidate_device(device, 1); - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = 0; - gd->sizes[minor] = 0; - - /* Clear the consists-of-virtual-partitions flag if possible. */ - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; - for ( i = 1; i < gd->max_p; i++ ) - if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 ) - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - - /* - * If all virtual partitions are now gone, and a 'whole unit' VBD is - * present, then we can try to grok the unit's real partition table. - */ - if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) && - (gd->sizes[minor & ~(gd->max_p-1)] != 0) && - !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) ) - { - register_disk(gd, - device&~(gd->max_p-1), - gd->max_p, - &xlvbd_block_fops, - gd->part[minor&~(gd->max_p-1)].nr_sects); - } - } - else - { - /* - * 2: The VBD is mapped to an entire 'unit'. Clear all partitions. - * NB. The partition entries are only cleared if there are no VBDs - * mapped to individual partitions on this unit. - */ - i = gd->max_p - 1; /* Default: clear subpartitions as well. */ - if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */ - while ( i >= 0 ) - { - invalidate_device(device+i, 1); - gd->part[minor+i].start_sect = 0; - gd->part[minor+i].nr_sects = 0; - gd->sizes[minor+i] = 0; - i--; - } - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - -/* - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver - * state. The VBDs need to be updated in this way when the domain is - * initialised and also each time we receive an XLBLK_UPDATE event. - */ -void xlvbd_update_vbds(void) -{ - int i, j, k, old_nr, new_nr; - xen_disk_t *old_info, *new_info, *merged_info; - - old_info = vbd_info; - old_nr = nr_vbds; - - new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); - if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) - { - kfree(new_info); - return; - } - - /* - * Final list maximum size is old list + new list. This occurs only when - * old list and new list do not overlap at all, and we cannot yet destroy - * VBDs in the old list because the usage counts are busy. - */ - merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); - - /* @i tracks old list; @j tracks new list; @k tracks merged list. */ - i = j = k = 0; - - while ( (i < old_nr) && (j < new_nr) ) - { - if ( old_info[i].device < new_info[j].device ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - i++; - } - else if ( old_info[i].device > new_info[j].device ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - j++; - } - else - { - if ( ((old_info[i].capacity == new_info[j].capacity) && - (old_info[i].info == new_info[j].info)) || - (xlvbd_remove_device(old_info[i].device) != 0) ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - else if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - i++; j++; - } - } - - for ( ; i < old_nr; i++ ) - { - if ( xlvbd_remove_device(old_info[i].device) != 0 ) - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); - } - - for ( ; j < new_nr; j++ ) - { - if ( xlvbd_init_device(&new_info[j]) == 0 ) - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); - } - - vbd_info = merged_info; - nr_vbds = k; - - kfree(old_info); - kfree(new_info); -} - - -/* - * Set up all the linux device goop for the virtual block devices (vbd's) that - * xen tells us about. Note that although from xen's pov VBDs are addressed - * simply an opaque 16-bit device number, the domain creation tools - * conventionally allocate these numbers to correspond to those used by 'real' - * linux -- this is just for convenience as it means e.g. that the same - * /etc/fstab can be used when booting with or without xen. - */ -int __init xlvbd_init(void) -{ - int i; - - /* - * If compiled as a module, we don't support unloading yet. We therefore - * permanently increment the reference count to disallow it. - */ - SET_MODULE_OWNER(&xlvbd_block_fops); - MOD_INC_USE_COUNT; - - /* Initialize the global arrays. */ - for ( i = 0; i < 256; i++ ) - { - /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ - xlide_blksize_size[i] = 1024; - xlide_hardsect_size[i] = 512; - xlide_max_sectors[i] = 128; /* 'hwif->rqsize' if we knew it */ - - /* from the generic scsi disk code (drivers/scsi/sd.c) */ - xlscsi_blksize_size[i] = 1024; /* XXX 512; */ - xlscsi_hardsect_size[i] = 512; - xlscsi_max_sectors[i] = 128*8; /* XXX 128; */ - - /* we don't really know what to set these too since it depends */ - xlvbd_blksize_size[i] = 512; - xlvbd_hardsect_size[i] = 512; - xlvbd_max_sectors[i] = 128; - } - - vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); - nr_vbds = xlvbd_get_vbd_info(vbd_info); - - if ( nr_vbds < 0 ) - { - kfree(vbd_info); - vbd_info = NULL; - nr_vbds = 0; - } - else - { - for ( i = 0; i < nr_vbds; i++ ) - xlvbd_init_device(&vbd_info[i]); - } - - return 0; -} - - -#ifdef MODULE -module_init(xlvbd_init); -#endif diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h deleted file mode 100644 index 9fded2bb90..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h +++ /dev/null @@ -1,109 +0,0 @@ -/****************************************************************************** - * vblkif.h - * - * Unified block-device I/O interface for Xen guest OSes. - * - * Copyright (c) 2003-2004, Keir Fraser - */ - -#ifndef __SHARED_VBLKIF_H__ -#define __SHARED_VBLKIF_H__ - -#define blkif_vdev_t u16 -#define blkif_sector_t u64 - -#define BLKIF_OP_READ 0 -#define BLKIF_OP_WRITE 1 -#define BLKIF_OP_PROBE 2 - -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */ -#define BLKIF_RING_SIZE 64 - -/* - * Maximum scatter/gather segments per request. - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE. - * NB. This could be 12 if the ring indexes weren't stored in the same page. - */ -#define BLKIF_REQUEST_MAX_SEGMENTS 11 - -typedef struct { - unsigned char operation; /* BLKIF_OP_??? */ - unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */ - blkif_vdev_t device; /* only for read/write requests */ - unsigned long id; /* private guest value, echoed in resp */ - xen_sector_t sector_number; /* start sector idx on disk (r/w only) */ - /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ - unsigned long buffer_and_sects[MAX_BLK_SEGS]; -} blkif_request_t; - -typedef struct { - unsigned long id; /* copied from request */ - unsigned char operation; /* copied from request */ - int status; /* BLKIF_RSP_??? */ -} blkif_response_t; - -#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */ -#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */ - -/* - * We use a special capitalised type name because it is _essential_ that all - * arithmetic on indexes is done on an integer type of the correct size. - */ -typedef unsigned int BLKIF_RING_IDX; - -/* - * Ring indexes are 'free running'. That is, they are not stored modulo the - * size of the ring buffer. The following macro converts a free-running counter - * into a value that can directly index a ring-buffer array. - */ -#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1)) - -typedef struct { - BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */ - BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */ - union { - blkif_request_t req; - blkif_response_t resp; - } ring[BLKIF_RING_SIZE]; -} blkif_ring_t; - - -/* - * BLKIF_OP_PROBE: - * The request format for a probe request is constrained as follows: - * @operation == BLKIF_OP_PROBE - * @nr_segments == size of probe buffer in pages - * @device == unused (zero) - * @id == any value (echoed in response message) - * @sector_num == unused (zero) - * @buffer_and_sects == list of page-aligned, page-sized buffers. - * (i.e., nr_sects == 8). - * - * The response is a list of vdisk_t elements copied into the out-of-band - * probe buffer. On success the response status field contains the number - * of vdisk_t elements. - */ - -/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */ -#define VDISK_TYPE_FLOPPY 0x00 -#define VDISK_TYPE_TAPE 0x01 -#define VDISK_TYPE_CDROM 0x05 -#define VDISK_TYPE_OPTICAL 0x07 -#define VDISK_TYPE_DISK 0x20 - -#define VDISK_TYPE_MASK 0x3F -#define VDISK_TYPE(_x) ((_x) & VDISK_TYPE_MASK) - -/* The top two bits of the type field encode various flags. */ -#define VDISK_FLAG_RO 0x40 -#define VDISK_FLAG_VIRT 0x80 -#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO) -#define VDISK_VIRTUAL(_x) ((_x) & VDISK_FLAG_VIRT) - -typedef struct { - blkif_vdev_t device; /* Device number (opaque 16 bit value). */ - unsigned short info; /* Device type and flags (VDISK_*). */ - xen_sector_t capacity; /* Size in terms of 512-byte sectors. */ -} vdisk_t; - -#endif /* __SHARED_VBLKIF_H__ */ diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile deleted file mode 100644 index 20c8192d3d..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile +++ /dev/null @@ -1,10 +0,0 @@ - -O_TARGET := drv.o - -subdir-y += frontend -obj-y += frontend/drv.o - -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend -obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o - -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile deleted file mode 100644 index 032d02d7cc..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := main.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c deleted file mode 100644 index b0e77ab522..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c +++ /dev/null @@ -1,26 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/vnetif/backend/main.c - * - * Back-end of the driver for virtual block devices. This portion of the - * driver exports a 'unified' block-device interface that can be accessed - * by any operating system that implements a compatible front end. A - * reference front-end implementation can be found in: - * arch/xen/drivers/vnetif/frontend - * - * Copyright (c) 2004, K A Fraser - */ - -#include -#include - -static int __init init_module(void) -{ - return 0; -} - -static void cleanup_module(void) -{ -} - -module_init(init_module); -module_exit(cleanup_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile deleted file mode 100644 index 304c2e78ef..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -O_TARGET := drv.o -obj-y := vnetif.o -include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c deleted file mode 100644 index d1a4b21ad0..0000000000 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c +++ /dev/null @@ -1,565 +0,0 @@ -/****************************************************************************** - * vnetif.c - * - * Virtual network driver for XenoLinux. - * - * Copyright (c) 2002-2004, K A Fraser - */ - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ - -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); -static void network_tx_buf_gc(struct net_device *dev); -static void network_alloc_rx_buffers(struct net_device *dev); -static void cleanup_module(void); - -/* Dynamically-mapped IRQs. */ -static int network_irq, debug_irq; - -static struct list_head dev_list; - -struct net_private -{ - struct list_head list; - struct net_device *dev; - - struct net_device_stats stats; - NET_RING_IDX rx_resp_cons, tx_resp_cons; - unsigned int net_ring_fixmap_idx, tx_full; - net_ring_t *net_ring; - net_idx_t *net_idx; - spinlock_t tx_lock; - unsigned int idx; /* Domain-specific index of this VIF. */ - - unsigned int rx_bufs_to_notify; - -#define STATE_ACTIVE 0 -#define STATE_SUSPENDED 1 -#define STATE_CLOSED 2 - unsigned int state; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in each - * array is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; -}; - -/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ -#define ADD_ID_TO_FREELIST(_list, _id) \ - (_list)[(_id)] = (_list)[0]; \ - (_list)[0] = (void *)(unsigned long)(_id); -#define GET_ID_FROM_FREELIST(_list) \ - ({ unsigned long _id = (unsigned long)(_list)[0]; \ - (_list)[0] = (_list)[_id]; \ - (unsigned short)_id; }) - - -static void _dbg_network_int(struct net_device *dev) -{ - struct net_private *np = dev->priv; - - if ( np->state == STATE_CLOSED ) - return; - - printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," - " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," - " tx_event=0x%08x, state=%d\n", - np->tx_full, np->tx_resp_cons, - np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, - np->net_idx->tx_event, - test_bit(__LINK_STATE_XOFF, &dev->state)); - printk(KERN_ALERT "net: rx_resp_cons=0x%08x," - " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", - np->rx_resp_cons, np->net_idx->rx_req_prod, - np->net_idx->rx_resp_prod, np->net_idx->rx_event); -} - - -static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _dbg_network_int(np->dev); - } -} - - -static int network_open(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - int i, ret; - - netop.cmd = NETOP_RESET_RINGS; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); - return ret; - } - - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = np->idx; - if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) - { - printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); - return ret; - } - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, - netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); - np->net_ring = (net_ring_t *)fix_to_virt( - FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; - - np->rx_bufs_to_notify = 0; - np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; - memset(&np->stats, 0, sizeof(np->stats)); - spin_lock_init(&np->tx_lock); - memset(np->net_ring, 0, sizeof(*np->net_ring)); - memset(np->net_idx, 0, sizeof(*np->net_idx)); - - /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) - np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) - np->rx_skbs[i] = (void *)(i+1); - - wmb(); - np->state = STATE_ACTIVE; - - network_alloc_rx_buffers(dev); - - netif_start_queue(dev); - - MOD_INC_USE_COUNT; - - return 0; -} - - -static void network_tx_buf_gc(struct net_device *dev) -{ - NET_RING_IDX i, prod; - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - tx_entry_t *tx_ring = np->net_ring->tx_ring; - - do { - prod = np->net_idx->tx_resp_prod; - - for ( i = np->tx_resp_cons; i != prod; i++ ) - { - id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; - skb = np->tx_skbs[id]; - ADD_ID_TO_FREELIST(np->tx_skbs, id); - dev_kfree_skb_any(skb); - } - - np->tx_resp_cons = prod; - - /* - * Set a new event, then check for race with update of tx_cons. Note - * that it is essential to schedule a callback, no matter how few - * buffers are pending. Even if there is space in the transmit ring, - * higher layers may be blocked because too much data is outstanding: - * in such cases notification from Xen is likely to be the only kick - * that we'll get. - */ - np->net_idx->tx_event = - prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; - mb(); - } - while ( prod != np->net_idx->tx_resp_prod ); - - if ( np->tx_full && - ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) - { - np->tx_full = 0; - if ( np->state == STATE_ACTIVE ) - netif_wake_queue(dev); - } -} - - -static inline pte_t *get_ppte(void *addr) -{ - pgd_t *pgd; pmd_t *pmd; pte_t *pte; - pgd = pgd_offset_k( (unsigned long)addr); - pmd = pmd_offset(pgd, (unsigned long)addr); - pte = pte_offset(pmd, (unsigned long)addr); - return pte; -} - - -static void network_alloc_rx_buffers(struct net_device *dev) -{ - unsigned short id; - struct net_private *np = dev->priv; - struct sk_buff *skb; - netop_t netop; - NET_RING_IDX i = np->net_idx->rx_req_prod; - - if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || - unlikely(np->state != STATE_ACTIVE) ) - return; - - do { - skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(skb == NULL) ) - break; - - skb->dev = dev; - - if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) - panic("alloc_skb needs to provide us page-aligned buffers."); - - id = GET_ID_FROM_FREELIST(np->rx_skbs); - np->rx_skbs[id] = skb; - - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = - virt_to_machine(get_ppte(skb->head)); - - np->rx_bufs_to_notify++; - } - while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); - - /* - * We may have allocated buffers which have entries outstanding in the page - * update queue -- make sure we flush those first! - */ - flush_page_update_queue(); - - np->net_idx->rx_req_prod = i; - np->net_idx->rx_event = np->rx_resp_cons + 1; - - /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - np->rx_bufs_to_notify = 0; - } -} - - -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - unsigned short id; - struct net_private *np = (struct net_private *)dev->priv; - tx_req_entry_t *tx; - netop_t netop; - NET_RING_IDX i; - - if ( unlikely(np->tx_full) ) - { - printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); - netif_stop_queue(dev); - return -ENOBUFS; - } - - if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= - PAGE_SIZE) ) - { - struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); - if ( unlikely(new_skb == NULL) ) - return 1; - skb_put(new_skb, skb->len); - memcpy(new_skb->data, skb->data, skb->len); - dev_kfree_skb(skb); - skb = new_skb; - } - - spin_lock_irq(&np->tx_lock); - - i = np->net_idx->tx_req_prod; - - id = GET_ID_FROM_FREELIST(np->tx_skbs); - np->tx_skbs[id] = skb; - - tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; - - tx->id = id; - tx->addr = phys_to_machine(virt_to_phys(skb->data)); - tx->size = skb->len; - - wmb(); - np->net_idx->tx_req_prod = i + 1; - - network_tx_buf_gc(dev); - - if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) - { - np->tx_full = 1; - netif_stop_queue(dev); - } - - spin_unlock_irq(&np->tx_lock); - - np->stats.tx_bytes += skb->len; - np->stats.tx_packets++; - - /* Only notify Xen if there are no outstanding responses. */ - mb(); - if ( np->net_idx->tx_resp_prod == i ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - } - - return 0; -} - - -static inline void _network_interrupt(struct net_device *dev) -{ - struct net_private *np = dev->priv; - unsigned long flags; - struct sk_buff *skb; - rx_resp_entry_t *rx; - NET_RING_IDX i; - - if ( unlikely(np->state == STATE_CLOSED) ) - return; - - spin_lock_irqsave(&np->tx_lock, flags); - network_tx_buf_gc(dev); - spin_unlock_irqrestore(&np->tx_lock, flags); - - again: - for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) - { - rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; - - skb = np->rx_skbs[rx->id]; - ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); - - if ( unlikely(rx->status != RING_STATUS_OK) ) - { - /* Gate this error. We get a (valid) slew of them on suspend. */ - if ( np->state == STATE_ACTIVE ) - printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); - dev_kfree_skb_any(skb); - continue; - } - - /* - * Set up shinfo -- from alloc_skb This was particularily nasty: the - * shared info is hidden at the back of the data area (presumably so it - * can be shared), but on page flip it gets very spunked. - */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; - - skb->data = skb->tail = skb->head + rx->offset; - skb_put(skb, rx->size); - skb->protocol = eth_type_trans(skb, dev); - - np->stats.rx_packets++; - - np->stats.rx_bytes += rx->size; - netif_rx(skb); - dev->last_rx = jiffies; - } - - np->rx_resp_cons = i; - - network_alloc_rx_buffers(dev); - - /* Deal with hypervisor racing our resetting of rx_event. */ - mb(); - if ( np->net_idx->rx_resp_prod != i ) - goto again; -} - - -static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) -{ - struct list_head *ent; - struct net_private *np; - list_for_each ( ent, &dev_list ) - { - np = list_entry(ent, struct net_private, list); - _network_interrupt(np->dev); - } -} - - -static int network_close(struct net_device *dev) -{ - struct net_private *np = dev->priv; - netop_t netop; - - np->state = STATE_SUSPENDED; - wmb(); - - netif_stop_queue(np->dev); - - netop.cmd = NETOP_FLUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - - while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || - (np->tx_resp_cons != np->net_idx->tx_req_prod) ) - { - barrier(); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } - - wmb(); - np->state = STATE_CLOSED; - wmb(); - - /* Now no longer safe to take interrupts for this device. */ - clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); - - MOD_DEC_USE_COUNT; - - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct net_private *np = (struct net_private *)dev->priv; - return &np->stats; -} - - -static int __init init_module(void) -{ -#if 0 - int i, fixmap_idx=-1, err; - struct net_device *dev; - struct net_private *np; - netop_t netop; - - INIT_LIST_HEAD(&dev_list); - - network_irq = bind_virq_to_irq(VIRQ_NET); - debug_irq = bind_virq_to_irq(VIRQ_DEBUG); - - err = request_irq(network_irq, network_interrupt, - SA_SAMPLE_RANDOM, "network", NULL); - if ( err ) - { - printk(KERN_WARNING "Could not allocate network interrupt\n"); - goto fail; - } - - err = request_irq(debug_irq, dbg_network_int, - SA_SHIRQ, "net_dbg", &dbg_network_int); - if ( err ) - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); - - for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) - { - /* If the VIF is invalid then the query hypercall will fail. */ - netop.cmd = NETOP_GET_VIF_INFO; - netop.vif = i; - if ( HYPERVISOR_net_io_op(&netop) != 0 ) - continue; - - /* We actually only support up to 4 vifs right now. */ - if ( ++fixmap_idx == 4 ) - break; - - dev = alloc_etherdev(sizeof(struct net_private)); - if ( dev == NULL ) - { - err = -ENOMEM; - goto fail; - } - - np = dev->priv; - np->state = STATE_CLOSED; - np->net_ring_fixmap_idx = fixmap_idx; - np->idx = i; - - SET_MODULE_OWNER(dev); - dev->open = network_open; - dev->hard_start_xmit = network_start_xmit; - dev->stop = network_close; - dev->get_stats = network_get_stats; - - memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); - - if ( (err = register_netdev(dev)) != 0 ) - { - kfree(dev); - goto fail; - } - - np->dev = dev; - list_add(&np->list, &dev_list); - } - - return 0; - - fail: - cleanup_module(); - return err; -#endif - return 0; -} - - -static void cleanup_module(void) -{ - struct net_private *np; - struct net_device *dev; - - while ( !list_empty(&dev_list) ) - { - np = list_entry(dev_list.next, struct net_private, list); - list_del(&np->list); - dev = np->dev; - unregister_netdev(dev); - kfree(dev); - } - - free_irq(network_irq, NULL); - free_irq(debug_irq, NULL); - - unbind_virq_from_irq(VIRQ_NET); - unbind_virq_from_irq(VIRQ_DEBUG); -} - - -module_init(init_module); -module_exit(cleanup_module);